[OpenMP] Fix hidden helper + affinity

When KMP_AFFINITY is set, each worker thread's gtid value is used as an
index into the place list to determine the thread's placement. With hidden
helpers enabled, this gtid value is shifted down leading to unexpected
shifted thread placement. This patch restores the previous behavior by
adjusting the mask index to take the number of hidden helper threads
into account.

Hidden helper threads are given the full initial mask and do not
participate in any of the other affinity mechanisms (place partitioning,
balanced affinity). Their affinity is only printed for debug builds.

Differential Revision: https://reviews.llvm.org/D101882
This commit is contained in:
Peyton, Jonathan L 2021-05-10 10:03:23 -05:00
parent faebc6bf10
commit c765d140fe
3 changed files with 43 additions and 7 deletions

View File

@ -4067,11 +4067,26 @@ extern void __kmp_hidden_helper_main_thread_release();
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
#define KMP_HIDDEN_HELPER_TEAM(team) \
(team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
// main thread, is skipped.
#define KMP_GTID_TO_SHADOW_GTID(gtid) \
((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
// Return the adjusted gtid value by subtracting from gtid the number
// of hidden helper threads. This adjusted value is the gtid the thread would
// have received if there were no hidden helper threads.
static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
int adjusted_gtid = gtid;
if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
gtid - __kmp_hidden_helper_threads_num >= 0) {
adjusted_gtid -= __kmp_hidden_helper_threads_num;
}
return adjusted_gtid;
}
// Support for error directive
typedef enum kmp_severity_t {
severity_warning = 1,

View File

@ -3940,7 +3940,8 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
if (KMP_AFFINITY_NON_PROC_BIND) {
if ((__kmp_affinity_type == affinity_none) ||
(__kmp_affinity_type == affinity_balanced)) {
(__kmp_affinity_type == affinity_balanced) ||
KMP_HIDDEN_HELPER_THREAD(gtid)) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
return;
@ -3950,12 +3951,13 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
i = 0;
mask = __kmp_affin_fullMask;
} else {
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
}
} else {
if ((!isa_root) ||
if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
@ -3967,15 +3969,16 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
mask = __kmp_affin_fullMask;
} else {
// int i = some hash function or just a counter that doesn't
// always start at 0. Use gtid for now.
// always start at 0. Use adjusted gtid for now.
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
}
}
th->th.th_current_place = i;
if (isa_root) {
if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
th->th.th_new_place = i;
th->th.th_first_place = 0;
th->th.th_last_place = __kmp_affinity_num_masks - 1;
@ -3996,7 +3999,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
KMP_CPU_COPY(th->th.th_affin_mask, mask);
if (__kmp_affinity_verbose
if (__kmp_affinity_verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
/* to avoid duplicate printing (will be correctly printed on barrier) */
&& (__kmp_affinity_type == affinity_none ||
(i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
@ -4007,6 +4010,17 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
__kmp_gettid(), gtid, buf);
}
#if KMP_DEBUG
// Hidden helper thread affinity only printed for debug builds
if (__kmp_affinity_verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY (hidden helper thread)",
(kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
}
#endif
#if KMP_OS_WINDOWS
// On Windows* OS, the process affinity mask might have changed. If the user
// didn't request affinity and this call fails, just continue silently.
@ -4292,6 +4306,10 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
bool fine_gran = true;
int tid = th->th.th_info.ds.ds_tid;
// Do not perform balanced affinity for the hidden helper threads
if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
return;
switch (__kmp_affinity_gran) {
case KMP_HW_THREAD:
break;

View File

@ -4588,6 +4588,9 @@ __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
// thread's partition, and binds each worker to a thread in their partition.
// The primary thread's partition should already include its current binding.
static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
// Do not partition places for the hidden helper team
if (KMP_HIDDEN_HELPER_TEAM(team))
return;
// Copy the primary thread's place partition to the team struct
kmp_info_t *master_th = team->t.t_threads[0];
KMP_DEBUG_ASSERT(master_th != NULL);