Revert "[OpenMP] Add support for Intel's umonitor/umwait"

This reverts commit 9cfad5f9c5.
This commit is contained in:
AndreyChurbanov 2020-11-20 12:15:00 +03:00
parent 0c101c9cbc
commit 5644f734d6
14 changed files with 172 additions and 542 deletions

View File

@ -417,8 +417,6 @@ AffUsingHwloc "%1$s: Affinity capable, using hwloc."
AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism." AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism."
AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms." AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored." EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
EnvMwaitWarn "You have enabled the use of umonitor/umwait. If the CPU doesn't have that enabled "
"you'll get an illegal instruction exception."
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead." EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical." RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)" AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"

View File

@ -255,10 +255,6 @@ typedef union kmp_team kmp_team_p;
typedef union kmp_info kmp_info_p; typedef union kmp_info kmp_info_p;
typedef union kmp_root kmp_root_p; typedef union kmp_root kmp_root_p;
template <bool C = false, bool S = true> class kmp_flag_32;
template <bool C = false, bool S = true> class kmp_flag_64;
class kmp_flag_oncore;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -1322,96 +1318,6 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
} \ } \
} }
// User-level Monitor/Mwait
#if KMP_HAVE_UMWAIT
// We always try for UMWAIT first
#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \
(KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \
(KMP_COMPILER_CLANG && (KMP_MSVC_COMPAT || __MINGW32__)) || \
(KMP_COMPILER_GCC && __MINGW32__)
#if KMP_OS_UNIX
#include <immintrin.h>
#else
#include <intrin.h>
#endif // KMP_OS_UNIX
#else
#define USE_MWAIT_ASM \
KMP_OS_UNIX && (!KMP_COMPILER_ICC || __INTEL_COMPILER < 1900)
#endif // KMP_COMPILER_ICC etc.
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized yet
__attribute__((target("waitpkg")))
#endif
static inline int
__kmp_tpause(uint32_t hint, uint64_t counter) {
#if (USE_MWAIT_ASM)
uint32_t timeHi = uint32_t(counter >> 32);
uint32_t timeLo = uint32_t(counter & 0xffffffff);
char flag;
__asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
"setb %0"
: "=r"(flag)
: "a"(timeLo), "d"(timeHi), "c"(hint)
:);
return flag;
#else
return _tpause(hint, counter);
#endif
}
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine
__attribute__((target("waitpkg")))
#endif
static inline void
__kmp_umonitor(void *cacheline) {
#if (USE_MWAIT_ASM)
__asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
:
: "a"(cacheline)
:);
#else
_umonitor(cacheline);
#endif
}
#if KMP_OS_UNIX && 0 // "waitpkg" not recognized on our build machine
__attribute__((target("waitpkg")))
#endif
static inline int
__kmp_umwait(uint32_t hint, uint64_t counter) {
#if (USE_MWAIT_ASM)
uint32_t timeHi = uint32_t(counter >> 32);
uint32_t timeLo = uint32_t(counter & 0xffffffff);
char flag;
__asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
"setb %0"
: "=r"(flag)
: "a"(timeLo), "d"(timeHi), "c"(hint)
:);
return flag;
#else
return _umwait(hint, counter);
#endif
}
#elif KMP_HAVE_MWAIT
#if KMP_OS_UNIX
#include <pmmintrin.h>
#else
#include <intrin.h>
#endif
#if KMP_OS_UNIX
__attribute__((target("sse3")))
#endif
static inline void
__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
_mm_monitor(cacheline, extensions, hints);
}
#if KMP_OS_UNIX
__attribute__((target("sse3")))
#endif
static inline void
__kmp_mm_mwait(unsigned extensions, unsigned hints) {
_mm_mwait(extensions, hints);
}
#endif // KMP_HAVE_UMWAIT
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
/* Support datatypes for the orphaned construct nesting checks. */ /* Support datatypes for the orphaned construct nesting checks. */
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
@ -3188,13 +3094,6 @@ static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
KMP_FATAL(ThreadIdentInvalid); KMP_FATAL(ThreadIdentInvalid);
} }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
extern int __kmp_mwait_hints; // Hints to pass in to mwait
#endif
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
extern kmp_global_t __kmp_global; /* global status */ extern kmp_global_t __kmp_global; /* global status */
@ -3396,14 +3295,17 @@ extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
kmp_uint32 (*pred)(void *, kmp_uint32), void *obj); kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, class kmp_flag_32;
class kmp_flag_64;
class kmp_flag_oncore;
extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
int final_spin int final_spin
#if USE_ITT_BUILD #if USE_ITT_BUILD
, ,
void *itt_sync_obj void *itt_sync_obj
#endif #endif
); );
extern void __kmp_release_64(kmp_flag_64<> *flag); extern void __kmp_release_64(kmp_flag_64 *flag);
extern void __kmp_infinite_loop(void); extern void __kmp_infinite_loop(void);
@ -3501,6 +3403,13 @@ extern int __kmp_try_suspend_mx(kmp_info_t *th);
extern void __kmp_lock_suspend_mx(kmp_info_t *th); extern void __kmp_lock_suspend_mx(kmp_info_t *th);
extern void __kmp_unlock_suspend_mx(kmp_info_t *th); extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
extern void __kmp_elapsed(double *); extern void __kmp_elapsed(double *);
extern void __kmp_elapsed_tick(double *); extern void __kmp_elapsed_tick(double *);
@ -3625,6 +3534,28 @@ extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
kmp_task_t *task); kmp_task_t *task);
extern void __kmp_fulfill_event(kmp_event_t *event); extern void __kmp_fulfill_event(kmp_event_t *event);
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_32 *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_64 *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_oncore *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
extern void __kmp_free_task_team(kmp_info_t *thread, extern void __kmp_free_task_team(kmp_info_t *thread,
kmp_task_team_t *task_team); kmp_task_team_t *task_team);
extern void __kmp_reap_task_teams(void); extern void __kmp_reap_task_teams(void);
@ -3988,46 +3919,4 @@ extern void __kmp_omp_display_env(int verbose);
} }
#endif #endif
template <bool C, bool S>
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
template <bool C, bool S>
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
template <bool C, bool S>
#endif
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
template <bool C, bool S>
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_32<C, S> *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
template <bool C, bool S>
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_64<C, S> *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_oncore *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
#endif /* KMP_H */ #endif /* KMP_H */

View File

@ -78,7 +78,7 @@ static bool __kmp_linear_barrier_gather_template(
is valid any more - it could be deallocated by the master thread at any is valid any more - it could be deallocated by the master thread at any
time. */ time. */
ANNOTATE_BARRIER_BEGIN(this_thr); ANNOTATE_BARRIER_BEGIN(this_thr);
kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]); kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
flag.release(); flag.release();
} else { } else {
kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
@ -101,14 +101,14 @@ static bool __kmp_linear_barrier_gather_template(
&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
// Wait for worker thread to arrive // Wait for worker thread to arrive
kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
new_state);
if (cancellable) { if (cancellable) {
kmp_flag_64<true, false> flag( bool cancelled = flag.wait_cancellable_nosleep(
&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj))) if (cancelled)
return true; return true;
} else { } else {
kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
new_state);
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
} }
ANNOTATE_BARRIER_END(other_threads[i]); ANNOTATE_BARRIER_END(other_threads[i]);
@ -203,20 +203,22 @@ static bool __kmp_linear_barrier_release_template(
other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go,
other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
ANNOTATE_BARRIER_BEGIN(other_threads[i]); ANNOTATE_BARRIER_BEGIN(other_threads[i]);
kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go, kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
other_threads[i]); other_threads[i]);
flag.release(); flag.release();
} }
} }
} else { // Wait for the MASTER thread to release us } else { // Wait for the MASTER thread to release us
KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
if (cancellable) { if (cancellable) {
kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); bool cancelled = flag.wait_cancellable_nosleep(
if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj))) this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
if (cancelled) {
return true; return true;
}
} else { } else {
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
} }
ANNOTATE_BARRIER_END(this_thr); ANNOTATE_BARRIER_END(this_thr);
@ -337,7 +339,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
// Wait for child to arrive // Wait for child to arrive
kmp_flag_64<> flag(&child_bar->b_arrived, new_state); kmp_flag_64 flag(&child_bar->b_arrived, new_state);
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(child_thr); ANNOTATE_BARRIER_END(child_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY #if USE_ITT_BUILD && USE_ITT_NOTIFY
@ -382,7 +384,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
is valid any more - it could be deallocated by the master thread at any is valid any more - it could be deallocated by the master thread at any
time. */ time. */
ANNOTATE_BARRIER_BEGIN(this_thr); ANNOTATE_BARRIER_BEGIN(this_thr);
kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]); kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
flag.release(); flag.release();
} else { } else {
// Need to update the team arrived pointer if we are the master thread // Need to update the team arrived pointer if we are the master thread
@ -418,7 +420,7 @@ static void __kmp_tree_barrier_release(
KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid, KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
// Wait for parent thread to release us // Wait for parent thread to release us
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(this_thr); ANNOTATE_BARRIER_END(this_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY #if USE_ITT_BUILD && USE_ITT_NOTIFY
@ -496,7 +498,7 @@ static void __kmp_tree_barrier_release(
child_bar->b_go + KMP_BARRIER_STATE_BUMP)); child_bar->b_go + KMP_BARRIER_STATE_BUMP));
// Release child from barrier // Release child from barrier
ANNOTATE_BARRIER_BEGIN(child_thr); ANNOTATE_BARRIER_BEGIN(child_thr);
kmp_flag_64<> flag(&child_bar->b_go, child_thr); kmp_flag_64 flag(&child_bar->b_go, child_thr);
flag.release(); flag.release();
child++; child++;
child_tid++; child_tid++;
@ -538,7 +540,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
#endif #endif
/* Perform a hypercube-embedded tree gather to wait until all of the threads /* Perform a hypercube-embedded tree gather to wait until all of the threads
have arrived, and reduce any required data as we go. */ have arrived, and reduce any required data as we go. */
kmp_flag_64<> p_flag(&thr_bar->b_arrived); kmp_flag_64 p_flag(&thr_bar->b_arrived);
for (level = 0, offset = 1; offset < num_threads; for (level = 0, offset = 1; offset < num_threads;
level += branch_bits, offset <<= branch_bits) { level += branch_bits, offset <<= branch_bits) {
kmp_uint32 child; kmp_uint32 child;
@ -586,7 +588,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
// Wait for child to arrive // Wait for child to arrive
kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state); kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(child_thr); ANNOTATE_BARRIER_END(child_thr);
KMP_MB(); // Synchronize parent and child threads. KMP_MB(); // Synchronize parent and child threads.
@ -668,7 +670,7 @@ static void __kmp_hyper_barrier_release(
KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
&thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
// Wait for parent thread to release us // Wait for parent thread to release us
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(this_thr); ANNOTATE_BARRIER_END(this_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY #if USE_ITT_BUILD && USE_ITT_NOTIFY
@ -770,7 +772,7 @@ static void __kmp_hyper_barrier_release(
child_bar->b_go + KMP_BARRIER_STATE_BUMP)); child_bar->b_go + KMP_BARRIER_STATE_BUMP));
// Release child from barrier // Release child from barrier
ANNOTATE_BARRIER_BEGIN(child_thr); ANNOTATE_BARRIER_BEGIN(child_thr);
kmp_flag_64<> flag(&child_bar->b_go, child_thr); kmp_flag_64 flag(&child_bar->b_go, child_thr);
flag.release(); flag.release();
} }
} }
@ -915,7 +917,7 @@ static void __kmp_hierarchical_barrier_gather(
KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", "for leaf kids\n",
gtid, team->t.t_id, tid)); gtid, team->t.t_id, tid));
kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state); kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
if (reduce) { if (reduce) {
ANNOTATE_REDUCE_AFTER(reduce); ANNOTATE_REDUCE_AFTER(reduce);
@ -955,7 +957,7 @@ static void __kmp_hierarchical_barrier_gather(
gtid, team->t.t_id, tid, gtid, team->t.t_id, tid,
__kmp_gtid_from_tid(child_tid, team), team->t.t_id, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state)); child_tid, &child_bar->b_arrived, new_state));
kmp_flag_64<> flag(&child_bar->b_arrived, new_state); kmp_flag_64 flag(&child_bar->b_arrived, new_state);
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(child_thr); ANNOTATE_BARRIER_END(child_thr);
if (reduce) { if (reduce) {
@ -988,7 +990,7 @@ static void __kmp_hierarchical_barrier_gather(
gtid, team->t.t_id, tid, gtid, team->t.t_id, tid,
__kmp_gtid_from_tid(child_tid, team), team->t.t_id, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state)); child_tid, &child_bar->b_arrived, new_state));
kmp_flag_64<> flag(&child_bar->b_arrived, new_state); kmp_flag_64 flag(&child_bar->b_arrived, new_state);
flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(child_thr); ANNOTATE_BARRIER_END(child_thr);
if (reduce) { if (reduce) {
@ -1023,8 +1025,7 @@ static void __kmp_hierarchical_barrier_gather(
!thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
// flag; release it // flag; release it
ANNOTATE_BARRIER_BEGIN(this_thr); ANNOTATE_BARRIER_BEGIN(this_thr);
kmp_flag_64<> flag(&thr_bar->b_arrived, kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
other_threads[thr_bar->parent_tid]);
flag.release(); flag.release();
} else { } else {
// Leaf does special release on "offset" bits of parent's b_arrived flag // Leaf does special release on "offset" bits of parent's b_arrived flag
@ -1068,7 +1069,7 @@ static void __kmp_hierarchical_barrier_release(
thr_bar->team == NULL) { thr_bar->team == NULL) {
// Use traditional method of waiting on my own b_go flag // Use traditional method of waiting on my own b_go flag
thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(this_thr); ANNOTATE_BARRIER_END(this_thr);
TCW_8(thr_bar->b_go, TCW_8(thr_bar->b_go,
@ -1217,7 +1218,7 @@ static void __kmp_hierarchical_barrier_release(
child_bar->b_go + KMP_BARRIER_STATE_BUMP)); child_bar->b_go + KMP_BARRIER_STATE_BUMP));
// Release child using child's b_go flag // Release child using child's b_go flag
ANNOTATE_BARRIER_BEGIN(child_thr); ANNOTATE_BARRIER_BEGIN(child_thr);
kmp_flag_64<> flag(&child_bar->b_go, child_thr); kmp_flag_64 flag(&child_bar->b_go, child_thr);
flag.release(); flag.release();
} }
} else { // Release all children at once with leaf_state bits on my own } else { // Release all children at once with leaf_state bits on my own
@ -1243,7 +1244,7 @@ static void __kmp_hierarchical_barrier_release(
child_bar->b_go + KMP_BARRIER_STATE_BUMP)); child_bar->b_go + KMP_BARRIER_STATE_BUMP));
// Release child using child's b_go flag // Release child using child's b_go flag
ANNOTATE_BARRIER_BEGIN(child_thr); ANNOTATE_BARRIER_BEGIN(child_thr);
kmp_flag_64<> flag(&child_bar->b_go, child_thr); kmp_flag_64 flag(&child_bar->b_go, child_thr);
flag.release(); flag.release();
} }
} }

View File

@ -206,13 +206,6 @@ int __kmp_display_env = FALSE;
int __kmp_display_env_verbose = FALSE; int __kmp_display_env_verbose = FALSE;
int __kmp_omp_cancellation = FALSE; int __kmp_omp_cancellation = FALSE;
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
int __kmp_user_level_mwait = FALSE;
int __kmp_umwait_enabled = FALSE;
int __kmp_mwait_enabled = FALSE;
int __kmp_mwait_hints = 0;
#endif
/* map OMP 3.0 schedule types with our internal schedule types */ /* map OMP 3.0 schedule types with our internal schedule types */
enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext +
kmp_sched_upper_std - kmp_sched_lower - 2] = { kmp_sched_upper_std - kmp_sched_lower - 2] = {

View File

@ -281,16 +281,6 @@ template <> struct traits_t<unsigned long long> {
#define __forceinline __inline #define __forceinline __inline
#endif #endif
/* Check if the OS/arch can support user-level mwait */
// All mwait code tests for UMWAIT first, so it should only fall back to ring3
// MWAIT for KNL.
#define KMP_HAVE_MWAIT \
((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
!KMP_MIC2)
#define KMP_HAVE_UMWAIT \
((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
!KMP_MIC)
#if KMP_OS_WINDOWS #if KMP_OS_WINDOWS
#include <windows.h> #include <windows.h>

View File

@ -5458,7 +5458,7 @@ void __kmp_free_team(kmp_root_t *root,
} }
#endif #endif
// first check if thread is sleeping // first check if thread is sleeping
kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
if (fl.is_sleeping()) if (fl.is_sleeping())
fl.resume(__kmp_gtid_from_thread(th)); fl.resume(__kmp_gtid_from_thread(th));
KMP_CPU_PAUSE(); KMP_CPU_PAUSE();
@ -5885,7 +5885,7 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
/* Need release fence here to prevent seg faults for tree forkjoin barrier /* Need release fence here to prevent seg faults for tree forkjoin barrier
* (GEH) */ * (GEH) */
ANNOTATE_HAPPENS_BEFORE(thread); ANNOTATE_HAPPENS_BEFORE(thread);
kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
__kmp_release_64(&flag); __kmp_release_64(&flag);
} }
@ -6579,48 +6579,6 @@ static void __kmp_check_mic_type() {
#endif /* KMP_MIC_SUPPORTED */ #endif /* KMP_MIC_SUPPORTED */
#if KMP_HAVE_UMWAIT
static void __kmp_user_level_mwait_init() {
struct kmp_cpuid buf;
__kmp_x86_cpuid(7, 0, &buf);
__kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
__kmp_umwait_enabled));
}
#elif KMP_HAVE_MWAIT
#ifndef AT_INTELPHIUSERMWAIT
// Spurious, non-existent value that should always fail to return anything.
// Will be replaced with the correct value when we know that.
#define AT_INTELPHIUSERMWAIT 10000
#endif
// getauxval() function is available in RHEL7 and SLES12. If a system with an
// earlier OS is used to build the RTL, we'll use the following internal
// function when the entry is not found.
unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
unsigned long getauxval(unsigned long) { return 0; }
static void __kmp_user_level_mwait_init() {
// When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available
// use them to find if the user-level mwait is enabled. Otherwise, forcibly
// set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable
// KMP_USER_LEVEL_MWAIT was set to TRUE.
if (__kmp_mic_type == mic3) {
unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
if ((res & 0x1) || __kmp_user_level_mwait) {
__kmp_mwait_enabled = TRUE;
if (__kmp_user_level_mwait) {
KMP_INFORM(EnvMwaitWarn);
}
} else {
__kmp_mwait_enabled = FALSE;
}
}
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
"__kmp_mwait_enabled = %d\n",
__kmp_mic_type, __kmp_mwait_enabled));
}
#endif /* KMP_HAVE_UMWAIT */
static void __kmp_do_serial_initialize(void) { static void __kmp_do_serial_initialize(void) {
int i, gtid; int i, gtid;
int size; int size;
@ -6795,9 +6753,6 @@ static void __kmp_do_serial_initialize(void) {
__kmp_env_initialize(NULL); __kmp_env_initialize(NULL);
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
__kmp_user_level_mwait_init();
#endif
// Print all messages in message catalog for testing purposes. // Print all messages in message catalog for testing purposes.
#ifdef KMP_DEBUG #ifdef KMP_DEBUG
char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); char const *val = __kmp_env_get("KMP_DUMP_CATALOG");
@ -8398,8 +8353,7 @@ void __kmp_resume_if_soft_paused() {
for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) { for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
kmp_info_t *thread = __kmp_threads[gtid]; kmp_info_t *thread = __kmp_threads[gtid];
if (thread) { // Wake it if sleeping if (thread) { // Wake it if sleeping
kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
thread);
if (fl.is_sleeping()) if (fl.is_sleeping())
fl.resume(gtid); fl.resume(gtid);
else if (__kmp_try_suspend_mx(thread)) { // got suspend lock else if (__kmp_try_suspend_mx(thread)) { // got suspend lock

View File

@ -4621,35 +4621,6 @@ static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
__kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling); __kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
} // __kmp_stg_print_task_throttling } // __kmp_stg_print_task_throttling
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
// -----------------------------------------------------------------------------
// KMP_USER_LEVEL_MWAIT
static void __kmp_stg_parse_user_level_mwait(char const *name,
char const *value, void *data) {
__kmp_stg_parse_bool(name, value, &__kmp_user_level_mwait);
} // __kmp_stg_parse_user_level_mwait
static void __kmp_stg_print_user_level_mwait(kmp_str_buf_t *buffer,
char const *name, void *data) {
__kmp_stg_print_bool(buffer, name, __kmp_user_level_mwait);
} // __kmp_stg_print_user_level_mwait
// -----------------------------------------------------------------------------
// KMP_MWAIT_HINTS
static void __kmp_stg_parse_mwait_hints(char const *name, char const *value,
void *data) {
__kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_mwait_hints);
} // __kmp_stg_parse_mwait_hints
static void __kmp_stg_print_mwait_hints(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_stg_print_int(buffer, name, __kmp_mwait_hints);
} // __kmp_stg_print_mwait_hints
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// OMP_DISPLAY_ENV // OMP_DISPLAY_ENV
@ -4968,12 +4939,6 @@ static kmp_setting_t __kmp_stg_table[] = {
__kmp_stg_print_omp_tool_libraries, NULL, 0, 0}, __kmp_stg_print_omp_tool_libraries, NULL, 0, 0},
#endif #endif
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
{"KMP_USER_LEVEL_MWAIT", __kmp_stg_parse_user_level_mwait,
__kmp_stg_print_user_level_mwait, NULL, 0, 0},
{"KMP_MWAIT_HINTS", __kmp_stg_parse_mwait_hints,
__kmp_stg_print_mwait_hints, NULL, 0, 0},
#endif
{"", NULL, NULL, NULL, 0, 0}}; // settings {"", NULL, NULL, NULL, 0, 0}}; // settings
static int const __kmp_stg_count = static int const __kmp_stg_count =

View File

@ -258,7 +258,6 @@ enum stats_state_e {
macro(KMP_tree_release, 0, arg) \ macro(KMP_tree_release, 0, arg) \
macro(USER_resume, 0, arg) \ macro(USER_resume, 0, arg) \
macro(USER_suspend, 0, arg) \ macro(USER_suspend, 0, arg) \
macro(USER_mwait, 0, arg) \
macro(KMP_allocate_team, 0, arg) \ macro(KMP_allocate_team, 0, arg) \
macro(KMP_setup_icv_copy, 0, arg) \ macro(KMP_setup_icv_copy, 0, arg) \
macro(USER_icv_copy, 0, arg) \ macro(USER_icv_copy, 0, arg) \

View File

@ -786,8 +786,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
} }
int thread_finished = FALSE; int thread_finished = FALSE;
kmp_flag_32<false, false> flag( kmp_flag_32 flag((std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
(std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
while (node.dn.npredecessors > 0) { while (node.dn.npredecessors > 0) {
flag.execute_tasks(thread, gtid, FALSE, flag.execute_tasks(thread, gtid, FALSE,
&thread_finished USE_ITT_BUILD_ARG(NULL), &thread_finished USE_ITT_BUILD_ARG(NULL),

View File

@ -1876,10 +1876,9 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
must_wait = must_wait || (thread->th.th_task_team != NULL && must_wait = must_wait || (thread->th.th_task_team != NULL &&
thread->th.th_task_team->tt.tt_found_proxy_tasks); thread->th.th_task_team->tt.tt_found_proxy_tasks);
if (must_wait) { if (must_wait) {
kmp_flag_32<false, false> flag( kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
RCAST(std::atomic<kmp_uint32> *, &(taskdata->td_incomplete_child_tasks)),
&(taskdata->td_incomplete_child_tasks)), 0U);
0U);
while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) { while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
flag.execute_tasks(thread, gtid, FALSE, flag.execute_tasks(thread, gtid, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
@ -1985,7 +1984,7 @@ kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
thread->th.ompt_thread_info.ompt_task_yielded = 1; thread->th.ompt_thread_info.ompt_task_yielded = 1;
#endif #endif
__kmp_execute_tasks_32( __kmp_execute_tasks_32(
thread, gtid, (kmp_flag_32<> *)NULL, FALSE, thread, gtid, NULL, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint); __kmp_task_stealing_constraint);
#if OMPT_SUPPORT #if OMPT_SUPPORT
@ -2513,8 +2512,8 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
if (!taskdata->td_flags.team_serial || if (!taskdata->td_flags.team_serial ||
(thread->th.th_task_team != NULL && (thread->th.th_task_team != NULL &&
thread->th.th_task_team->tt.tt_found_proxy_tasks)) { thread->th.th_task_team->tt.tt_found_proxy_tasks)) {
kmp_flag_32<false, false> flag( kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)),
RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U); 0U);
while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) { while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
flag.execute_tasks(thread, gtid, FALSE, flag.execute_tasks(thread, gtid, FALSE,
&thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
@ -3022,9 +3021,8 @@ static inline int __kmp_execute_tasks_template(
} }
} }
template <bool C, bool S>
int __kmp_execute_tasks_32( int __kmp_execute_tasks_32(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin, kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) { kmp_int32 is_constrained) {
return __kmp_execute_tasks_template( return __kmp_execute_tasks_template(
@ -3032,9 +3030,8 @@ int __kmp_execute_tasks_32(
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
} }
template <bool C, bool S>
int __kmp_execute_tasks_64( int __kmp_execute_tasks_64(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin, kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
kmp_int32 is_constrained) { kmp_int32 is_constrained) {
return __kmp_execute_tasks_template( return __kmp_execute_tasks_template(
@ -3051,23 +3048,6 @@ int __kmp_execute_tasks_oncore(
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
} }
template int
__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
kmp_flag_32<false, false> *, int,
int *USE_ITT_BUILD_ARG(void *), kmp_int32);
template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
kmp_flag_64<false, true> *,
int,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
kmp_flag_64<true, false> *,
int,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks. // next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically. // First thread in allocates the task team atomically.
@ -3617,10 +3597,9 @@ void __kmp_task_team_wait(
// Worker threads may have dropped through to release phase, but could // Worker threads may have dropped through to release phase, but could
// still be executing tasks. Wait here for tasks to complete. To avoid // still be executing tasks. Wait here for tasks to complete. To avoid
// memory contention, only master thread checks termination condition. // memory contention, only master thread checks termination condition.
kmp_flag_32<false, false> flag( kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *,
RCAST(std::atomic<kmp_uint32> *, &task_team->tt.tt_unfinished_threads),
&task_team->tt.tt_unfinished_threads), 0U);
0U);
flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
} }
// Deactivate the old task team, so that the worker threads will stop // Deactivate the old task team, so that the worker threads will stop
@ -3642,7 +3621,7 @@ void __kmp_task_team_wait(
} }
// __kmp_tasking_barrier: // __kmp_tasking_barrier:
// This routine is called only when __kmp_tasking_mode == tskm_extra_barrier. // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
// Internal function to execute all tasks prior to a regular barrier or a join // Internal function to execute all tasks prior to a regular barrier or a join
// barrier. It is a full barrier itself, which unfortunately turns regular // barrier. It is a full barrier itself, which unfortunately turns regular
// barriers into double barriers and join barriers into 1 1/2 barriers. // barriers into double barriers and join barriers into 1 1/2 barriers.
@ -3656,7 +3635,7 @@ void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
#if USE_ITT_BUILD #if USE_ITT_BUILD
KMP_FSYNC_SPIN_INIT(spin, NULL); KMP_FSYNC_SPIN_INIT(spin, NULL);
#endif /* USE_ITT_BUILD */ #endif /* USE_ITT_BUILD */
kmp_flag_32<false, false> spin_flag(spin, 0U); kmp_flag_32 spin_flag(spin, 0U);
while (!spin_flag.execute_tasks(thread, gtid, TRUE, while (!spin_flag.execute_tasks(thread, gtid, TRUE,
&flag USE_ITT_BUILD_ARG(NULL), 0)) { &flag USE_ITT_BUILD_ARG(NULL), 0)) {
#if USE_ITT_BUILD #if USE_ITT_BUILD

View File

@ -12,32 +12,14 @@
#include "kmp_wait_release.h" #include "kmp_wait_release.h"
void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
if (final_spin) if (final_spin)
__kmp_wait_template<kmp_flag_64<>, TRUE>( __kmp_wait_template<kmp_flag_64, TRUE>(
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
else else
__kmp_wait_template<kmp_flag_64<>, FALSE>( __kmp_wait_template<kmp_flag_64, FALSE>(
this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, flag USE_ITT_BUILD_ARG(itt_sync_obj));
} }
void __kmp_release_64(kmp_flag_64<> *flag) { __kmp_release_template(flag); } void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
template <bool C, bool S>
void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag) {
__kmp_mwait_template(th_gtid, flag);
}
template <bool C, bool S>
void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_mwait_template(th_gtid, flag);
}
void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_mwait_template(th_gtid, flag);
}
template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
#endif

View File

@ -42,26 +42,20 @@ enum flag_type {
flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
}; };
struct flag_properties {
unsigned int type : 16;
unsigned int reserved : 16;
};
/*! /*!
* Base class for wait/release volatile flag * Base class for wait/release volatile flag
*/ */
template <typename P> class kmp_flag_native { template <typename P> class kmp_flag_native {
volatile P *loc; volatile P *loc;
flag_properties t; flag_type t;
public: public:
typedef P flag_t; typedef P flag_t;
kmp_flag_native(volatile P *p, flag_type ft) kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
: loc(p), t({(unsigned int)ft, 0U}) {}
volatile P *get() { return loc; } volatile P *get() { return loc; }
void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); } void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
void set(volatile P *new_loc) { loc = new_loc; } void set(volatile P *new_loc) { loc = new_loc; }
flag_type get_type() { return (flag_type)(t.type); } flag_type get_type() { return t; }
P load() { return *loc; } P load() { return *loc; }
void store(P val) { *loc = val; } void store(P val) { *loc = val; }
}; };
@ -73,12 +67,10 @@ template <typename P> class kmp_flag {
std::atomic<P> std::atomic<P>
*loc; /**< Pointer to the flag storage that is modified by another thread *loc; /**< Pointer to the flag storage that is modified by another thread
*/ */
flag_properties t; /**< "Type" of the flag in loc */ flag_type t; /**< "Type" of the flag in loc */
public: public:
typedef P flag_t; typedef P flag_t;
kmp_flag(std::atomic<P> *p, flag_type ft) kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
: loc(p), t({(unsigned int)ft, 0U}) {}
/*! /*!
* @result the pointer to the actual flag * @result the pointer to the actual flag
*/ */
@ -94,7 +86,7 @@ public:
/*! /*!
* @result the flag_type * @result the flag_type
*/ */
flag_type get_type() { return (flag_type)(t.type); } flag_type get_type() { return t; }
/*! /*!
* @result flag value * @result flag value
*/ */
@ -112,7 +104,6 @@ public:
bool notdone_check(); bool notdone_check();
P internal_release(); P internal_release();
void suspend(int th_gtid); void suspend(int th_gtid);
void mwait(int th_gtid);
void resume(int th_gtid); void resume(int th_gtid);
P set_sleeping(); P set_sleeping();
P unset_sleeping(); P unset_sleeping();
@ -169,8 +160,8 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
to wake it back up to prevent deadlocks! to wake it back up to prevent deadlocks!
NOTE: We may not belong to a team at this point. */ NOTE: We may not belong to a team at this point. */
template <class C, bool final_spin, bool Cancellable = false, template <class C, int final_spin, bool cancellable = false,
bool Sleepable = true> bool sleepable = true>
static inline bool static inline bool
__kmp_wait_template(kmp_info_t *this_thr, __kmp_wait_template(kmp_info_t *this_thr,
C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) { C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
@ -194,7 +185,7 @@ __kmp_wait_template(kmp_info_t *this_thr,
return false; return false;
} }
th_gtid = this_thr->th.th_info.ds.ds_gtid; th_gtid = this_thr->th.th_info.ds.ds_gtid;
if (Cancellable) { if (cancellable) {
kmp_team_t *team = this_thr->th.th_team; kmp_team_t *team = this_thr->th.th_team;
if (team && team->t.t_cancel_request == cancel_parallel) if (team && team->t.t_cancel_request == cancel_parallel)
return true; return true;
@ -384,7 +375,7 @@ final_spin=FALSE)
} }
#endif #endif
// Check if the barrier surrounding this wait loop has been cancelled // Check if the barrier surrounding this wait loop has been cancelled
if (Cancellable) { if (cancellable) {
kmp_team_t *team = this_thr->th.th_team; kmp_team_t *team = this_thr->th.th_team;
if (team && team->t.t_cancel_request == cancel_parallel) if (team && team->t.t_cancel_request == cancel_parallel)
break; break;
@ -409,31 +400,23 @@ final_spin=FALSE)
#endif #endif
// Don't suspend if wait loop designated non-sleepable // Don't suspend if wait loop designated non-sleepable
// in template parameters // in template parameters
if (!Sleepable) if (!sleepable)
continue; continue;
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) __kmp_pause_status != kmp_soft_paused)
continue; continue;
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
flag->mwait(th_gtid);
} else {
#endif
KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
#if KMP_OS_UNIX #if KMP_OS_UNIX
if (final_spin) if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
#endif #endif
flag->suspend(th_gtid); flag->suspend(th_gtid);
#if KMP_OS_UNIX #if KMP_OS_UNIX
if (final_spin) if (final_spin)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true); KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
#endif
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
}
#endif #endif
if (TCR_4(__kmp_global.g.g_done)) { if (TCR_4(__kmp_global.g.g_done)) {
@ -475,7 +458,7 @@ final_spin=FALSE)
KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false); KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
#endif #endif
KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin)); KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
if (Cancellable) { if (cancellable) {
kmp_team_t *team = this_thr->th.th_team; kmp_team_t *team = this_thr->th.th_team;
if (team && team->t.t_cancel_request == cancel_parallel) { if (team && team->t.t_cancel_request == cancel_parallel) {
if (tasks_completed) { if (tasks_completed) {
@ -492,83 +475,6 @@ final_spin=FALSE)
return false; return false;
} }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
// Set up a monitor on the flag variable causing the calling thread to wait in
// a less active state until the flag variable is modified.
template <class C>
static inline void __kmp_mwait_template(int th_gtid, C *flag) {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
kmp_info_t *th = __kmp_threads[th_gtid];
KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
flag->get()));
// User-level mwait is available
KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
__kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th);
volatile void *spin = flag->get();
void *cacheline = (void *)(kmp_uint64(spin) & ~(CACHE_LINE - 1));
if (!flag->done_check()) {
// Mark thread as no longer active
th->th.th_active = FALSE;
if (th->th.th_active_in_pool) {
th->th.th_active_in_pool = FALSE;
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
}
flag->set_sleeping();
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
#if KMP_HAVE_UMWAIT
if (__kmp_umwait_enabled) {
__kmp_umonitor(cacheline);
}
#elif KMP_HAVE_MWAIT
if (__kmp_mwait_enabled) {
__kmp_mm_monitor(cacheline, 0, 0);
}
#endif
// To avoid a race, check flag between 'monitor' and 'mwait'. A write to
// the address could happen after the last time we checked and before
// monitoring started, in which case monitor can't detect the change.
if (flag->done_check())
flag->unset_sleeping();
else {
// if flag changes here, wake-up happens immediately
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
__kmp_unlock_suspend_mx(th);
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
#if KMP_HAVE_UMWAIT
if (__kmp_umwait_enabled) {
__kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
}
#elif KMP_HAVE_MWAIT
if (__kmp_mwait_enabled) {
__kmp_mm_mwait(0, __kmp_mwait_hints);
}
#endif
KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
__kmp_lock_suspend_mx(th);
// Clean up sleep info; doesn't matter how/why this thread stopped waiting
if (flag->is_sleeping())
flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
}
// Mark thread as active again
th->th.th_active = TRUE;
if (TCR_4(th->th.th_in_pool)) {
KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
th->th.th_active_in_pool = TRUE;
}
} // Drop out to main wait loop to check flag, handle tasks, etc.
__kmp_unlock_suspend_mx(th);
KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
}
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
/* Release any threads specified as waiting on the flag by releasing the flag /* Release any threads specified as waiting on the flag by releasing the flag
and resume the waiting thread if indicated by the sleep bit(s). A thread that and resume the waiting thread if indicated by the sleep bit(s). A thread that
calls __kmp_wait_template must call this function to wake up the potentially calls __kmp_wait_template must call this function to wake up the potentially
@ -639,7 +545,7 @@ template <> struct flag_traits<kmp_uint64> {
}; };
// Basic flag that does not use C11 Atomics // Basic flag that does not use C11 Atomics
template <typename FlagType, bool Sleepable> template <typename FlagType>
class kmp_basic_flag_native : public kmp_flag_native<FlagType> { class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
typedef flag_traits<FlagType> traits_type; typedef flag_traits<FlagType> traits_type;
FlagType checker; /**< Value to compare flag to to check if flag has been FlagType checker; /**< Value to compare flag to to check if flag has been
@ -682,13 +588,7 @@ public:
/*! /*!
* @result true if the flag object has been released. * @result true if the flag object has been released.
*/ */
bool done_check() { bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
if (Sleepable)
return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
checker;
else
return traits_type::tcr(*(this->get())) == checker;
}
/*! /*!
* @param old_loc in old value of flag * @param old_loc in old value of flag
* @result true if the flag's old value indicates it was released. * @result true if the flag's old value indicates it was released.
@ -743,8 +643,7 @@ public:
enum barrier_type get_bt() { return bs_last_barrier; } enum barrier_type get_bt() { return bs_last_barrier; }
}; };
template <typename FlagType, bool Sleepable> template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
class kmp_basic_flag : public kmp_flag<FlagType> {
typedef flag_traits<FlagType> traits_type; typedef flag_traits<FlagType> traits_type;
FlagType checker; /**< Value to compare flag to to check if flag has been FlagType checker; /**< Value to compare flag to to check if flag has been
released. */ released. */
@ -786,12 +685,7 @@ public:
/*! /*!
* @result true if the flag object has been released. * @result true if the flag object has been released.
*/ */
bool done_check() { bool done_check() { return this->load() == checker; }
if (Sleepable)
return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
else
return this->load() == checker;
}
/*! /*!
* @param old_loc in old value of flag * @param old_loc in old value of flag
* @result true if the flag's old value indicates it was released. * @result true if the flag's old value indicates it was released.
@ -842,19 +736,14 @@ public:
enum barrier_type get_bt() { return bs_last_barrier; } enum barrier_type get_bt() { return bs_last_barrier; }
}; };
template <bool Cancellable, bool Sleepable> class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
public: public:
kmp_flag_32(std::atomic<kmp_uint32> *p) kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
: kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr) kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
: kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {} : kmp_basic_flag<kmp_uint32>(p, thr) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c) kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
: kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {} : kmp_basic_flag<kmp_uint32>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
#endif
void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@ -863,32 +752,27 @@ public:
this_thr, gtid, this, final_spin, this_thr, gtid, this, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
} }
bool wait(kmp_info_t *this_thr, void wait(kmp_info_t *this_thr,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
if (final_spin) if (final_spin)
return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>( __kmp_wait_template<kmp_flag_32, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else else
return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>( __kmp_wait_template<kmp_flag_32, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
} }
void release() { __kmp_release_template(this); } void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag32; } flag_type get_ptr_type() { return flag32; }
}; };
template <bool Cancellable, bool Sleepable> class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
public: public:
kmp_flag_64(volatile kmp_uint64 *p) kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {} : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {} : kmp_basic_flag_native<kmp_uint64>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
#endif
void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@ -897,15 +781,27 @@ public:
this_thr, gtid, this, final_spin, this_thr, gtid, this, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
} }
bool wait(kmp_info_t *this_thr, void wait(kmp_info_t *this_thr,
int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
if (final_spin) if (final_spin)
return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>( __kmp_wait_template<kmp_flag_64, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else else
return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>( __kmp_wait_template<kmp_flag_64, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj)); this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
} }
bool wait_cancellable_nosleep(kmp_info_t *this_thr,
int final_spin
USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
bool retval = false;
if (final_spin)
retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else
retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
return retval;
}
void release() { __kmp_release_template(this); } void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag64; } flag_type get_ptr_type() { return flag64; }
}; };
@ -963,8 +859,8 @@ public:
return true; return true;
else if (flag_switch) { else if (flag_switch) {
this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go, kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
(kmp_uint64)KMP_BARRIER_STATE_BUMP); (kmp_uint64)KMP_BARRIER_STATE_BUMP);
__kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
} }
return false; return false;
@ -1000,9 +896,6 @@ public:
} }
void release() { __kmp_release_template(this); } void release() { __kmp_release_template(this); }
void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
#endif
void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@ -1022,15 +915,15 @@ static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
if (!flag) if (!flag)
return; return;
switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) { switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
case flag32: case flag32:
__kmp_resume_32(gtid, (kmp_flag_32<> *)NULL); __kmp_resume_32(gtid, NULL);
break; break;
case flag64: case flag64:
__kmp_resume_64(gtid, (kmp_flag_64<> *)NULL); __kmp_resume_64(gtid, NULL);
break; break;
case flag_oncore: case flag_oncore:
__kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL); __kmp_resume_oncore(gtid, NULL);
break; break;
} }
} }

View File

@ -1459,7 +1459,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
__kmp_suspend_initialize_thread(th); __kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th); status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
th_gtid, flag->get())); th_gtid, flag->get()));
@ -1470,7 +1471,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) { __kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping(); flag->unset_sleeping();
__kmp_unlock_suspend_mx(th); status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
return; return;
} }
KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x," KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
@ -1533,7 +1535,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
th_gtid)); th_gtid));
status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond, status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
&th->th.th_suspend_mx.m_mutex); &th->th.th_suspend_mx.m_mutex);
#endif // USE_SUSPEND_TIMEOUT #endif
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) { if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
KMP_SYSFAIL("pthread_cond_wait", status); KMP_SYSFAIL("pthread_cond_wait", status);
@ -1573,26 +1575,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
} }
#endif #endif
__kmp_unlock_suspend_mx(th); status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
} }
template <bool C, bool S> void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
template <bool C, bool S> void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up /* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE. after setting the sleep bit indicated by the flag argument to FALSE.
The target thread must already have called __kmp_suspend_template() */ The target thread must already have called __kmp_suspend_template() */
@ -1611,7 +1608,9 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
KMP_DEBUG_ASSERT(gtid != target_gtid); KMP_DEBUG_ASSERT(gtid != target_gtid);
__kmp_suspend_initialize_thread(th); __kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th);
status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
if (!flag) { // coming from __kmp_null_resume_wrapper if (!flag) { // coming from __kmp_null_resume_wrapper
flag = (C *)CCAST(void *, th->th.th_sleep_loc); flag = (C *)CCAST(void *, th->th.th_sleep_loc);
@ -1620,11 +1619,13 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
// First, check if the flag is null or its type has changed. If so, someone // First, check if the flag is null or its type has changed. If so, someone
// else woke it up. // else woke it up.
if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
// simply shows what flag was cast to // simply shows what
// flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag(%p)\n", "awake: flag(%p)\n",
gtid, target_gtid, NULL)); gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th); status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
return; return;
} else { // if multiple threads are sleeping, flag should be internally } else { // if multiple threads are sleeping, flag should be internally
// referring to a specific thread here // referring to a specific thread here
@ -1634,7 +1635,8 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
"awake: flag(%p): " "awake: flag(%p): "
"%u => %u\n", "%u => %u\n",
gtid, target_gtid, flag->get(), old_spin, flag->load())); gtid, target_gtid, flag->get(), old_spin, flag->load()));
__kmp_unlock_suspend_mx(th); status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
return; return;
} }
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
@ -1654,27 +1656,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
#endif #endif
status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond); status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
KMP_CHECK_SYSFAIL("pthread_cond_signal", status); KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
__kmp_unlock_suspend_mx(th); status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
" for T#%d\n", " for T#%d\n",
gtid, target_gtid)); gtid, target_gtid));
} }
template <bool C, bool S> void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
template <bool C, bool S> void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
#if KMP_USE_MONITOR #if KMP_USE_MONITOR
void __kmp_resume_monitor() { void __kmp_resume_monitor() {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);

View File

@ -363,7 +363,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
th_gtid, flag->get())); th_gtid, flag->get()));
__kmp_suspend_initialize_thread(th); __kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th); __kmp_win32_mutex_lock(&th->th.th_suspend_mx);
KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's" KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"
" loc(%p)\n", " loc(%p)\n",
@ -375,7 +375,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) { __kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping(); flag->unset_sleeping();
__kmp_unlock_suspend_mx(th); __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
return; return;
} }
@ -437,26 +437,21 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
} }
} }
__kmp_unlock_suspend_mx(th); __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
} }
template <bool C, bool S> void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
template <bool C, bool S> void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag); __kmp_suspend_template(th_gtid, flag);
} }
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up /* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE */ after setting the sleep bit indicated by the flag argument to FALSE */
template <class C> template <class C>
@ -472,7 +467,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
gtid, target_gtid)); gtid, target_gtid));
__kmp_suspend_initialize_thread(th); __kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th); __kmp_win32_mutex_lock(&th->th.th_suspend_mx);
if (!flag) { // coming from __kmp_null_resume_wrapper if (!flag) { // coming from __kmp_null_resume_wrapper
flag = (C *)th->th.th_sleep_loc; flag = (C *)th->th.th_sleep_loc;
@ -486,7 +481,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p)\n", "awake: flag's loc(%p)\n",
gtid, target_gtid, NULL)); gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th); __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
return; return;
} else { } else {
typename C::flag_t old_spin = flag->unset_sleeping(); typename C::flag_t old_spin = flag->unset_sleeping();
@ -494,7 +489,7 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p): %u => %u\n", "awake: flag's loc(%p): %u => %u\n",
gtid, target_gtid, flag->get(), old_spin, *(flag->get()))); gtid, target_gtid, flag->get(), old_spin, *(flag->get())));
__kmp_unlock_suspend_mx(th); __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
return; return;
} }
} }
@ -504,28 +499,23 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
gtid, target_gtid, flag->get())); gtid, target_gtid, flag->get()));
__kmp_win32_cond_signal(&th->th.th_suspend_cv); __kmp_win32_cond_signal(&th->th.th_suspend_cv);
__kmp_unlock_suspend_mx(th); __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
" for T#%d\n", " for T#%d\n",
gtid, target_gtid)); gtid, target_gtid));
} }
template <bool C, bool S> void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
template <bool C, bool S> void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag); __kmp_resume_template(target_gtid, flag);
} }
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
void __kmp_yield() { Sleep(0); } void __kmp_yield() { Sleep(0); }
void __kmp_gtid_set_specific(int gtid) { void __kmp_gtid_set_specific(int gtid) {