[OpenMP] Fix hidden helper + affinity

When KMP_AFFINITY is set, each worker thread's gtid value is used as an index into the place list to determine the thread's placement. With hidden helpers enabled, this gtid value is shifted down leading to unexpected shifted thread placement. This patch restores the previous behavior by adjusting the mask index to take the number of hidden helper threads into account. Hidden helper threads are given the full initial mask and do not participate in any of the other affinity mechanisms (place partitioning, balanced affinity). Their affinity is only printed for debug builds. Differential Revision: https://reviews.llvm.org/D101882
2021-05-10 10:03:23 -05:00 · 2021-05-10 10:03:23 -05:00 · c765d140fe
parent faebc6bf10
commit c765d140fe
3 changed files with 43 additions and 7 deletions
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@ -4067,11 +4067,26 @@ extern void __kmp_hidden_helper_main_thread_release();
 #define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)                                  \
  ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)

+#define KMP_HIDDEN_HELPER_TEAM(team)                                           \
+  (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
+
 // Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
 // main thread, is skipped.
 #define KMP_GTID_TO_SHADOW_GTID(gtid)                                          \
  ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)

+// Return the adjusted gtid value by subtracting from gtid the number
+// of hidden helper threads. This adjusted value is the gtid the thread would
+// have received if there were no hidden helper threads.
+static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
+  int adjusted_gtid = gtid;
+  if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
+      gtid - __kmp_hidden_helper_threads_num >= 0) {
+    adjusted_gtid -= __kmp_hidden_helper_threads_num;
+  }
+  return adjusted_gtid;
+}
+
 // Support for error directive
 typedef enum kmp_severity_t {
  severity_warning = 1,
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@ -3940,7 +3940,8 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {

  if (KMP_AFFINITY_NON_PROC_BIND) {
    if ((__kmp_affinity_type == affinity_none) ||
-        (__kmp_affinity_type == affinity_balanced)) {
+        (__kmp_affinity_type == affinity_balanced) ||
+        KMP_HIDDEN_HELPER_THREAD(gtid)) {
 #if KMP_GROUP_AFFINITY
      if (__kmp_num_proc_groups > 1) {
        return;
@ -3950,12 +3951,13 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
      i = 0;
      mask = __kmp_affin_fullMask;
    } else {
+      int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
      KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
-      i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+      i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
      mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
    }
  } else {
-    if ((!isa_root) ||
+    if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
        (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
 #if KMP_GROUP_AFFINITY
      if (__kmp_num_proc_groups > 1) {
@ -3967,15 +3969,16 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
      mask = __kmp_affin_fullMask;
    } else {
      // int i = some hash function or just a counter that doesn't
-      // always start at 0.  Use gtid for now.
+      // always start at 0.  Use adjusted gtid for now.
+      int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
      KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
-      i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+      i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
      mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
    }
  }

  th->th.th_current_place = i;
-  if (isa_root) {
+  if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
    th->th.th_new_place = i;
    th->th.th_first_place = 0;
    th->th.th_last_place = __kmp_affinity_num_masks - 1;
@ -3996,7 +3999,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {

  KMP_CPU_COPY(th->th.th_affin_mask, mask);

-  if (__kmp_affinity_verbose
+  if (__kmp_affinity_verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
      /* to avoid duplicate printing (will be correctly printed on barrier) */
      && (__kmp_affinity_type == affinity_none ||
          (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
@ -4007,6 +4010,17 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
               __kmp_gettid(), gtid, buf);
  }

+#if KMP_DEBUG
+  // Hidden helper thread affinity only printed for debug builds
+  if (__kmp_affinity_verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
+    char buf[KMP_AFFIN_MASK_PRINT_LEN];
+    __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+                              th->th.th_affin_mask);
+    KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY (hidden helper thread)",
+               (kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
+  }
+#endif
+
 #if KMP_OS_WINDOWS
  // On Windows* OS, the process affinity mask might have changed. If the user
  // didn't request affinity and this call fails, just continue silently.
@ -4292,6 +4306,10 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
  bool fine_gran = true;
  int tid = th->th.th_info.ds.ds_tid;

+  // Do not perform balanced affinity for the hidden helper threads
+  if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
+    return;
+
  switch (__kmp_affinity_gran) {
  case KMP_HW_THREAD:
    break;
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@ -4588,6 +4588,9 @@ __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
 // thread's partition, and binds each worker to a thread in their partition.
 // The primary thread's partition should already include its current binding.
 static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
+  // Do not partition places for the hidden helper team
+  if (KMP_HIDDEN_HELPER_TEAM(team))
+    return;
  // Copy the primary thread's place partition to the team struct
  kmp_info_t *master_th = team->t.t_threads[0];
  KMP_DEBUG_ASSERT(master_th != NULL);