Improvements to process affinity mask setting

A couple improvements:
1) Add ability to limit fullMask size when KMP_HW_SUBSET limits resources.
2) Make KMP_HW_SUBSET work for affinity_none, and only limit fullMask in this case.

Patch by Andrey Churbanov.

Differential Revision: http://reviews.llvm.org/D21528

llvm-svn: 273278
This commit is contained in:
Jonathan Peyton 2016-06-21 15:54:38 +00:00
parent aee40fc61c
commit fd7cc42fed
1 changed files with 102 additions and 51 deletions

View File

@ -249,6 +249,7 @@ static int __kmp_nThreadsPerCore;
#ifndef KMP_DFLT_NTH_CORES
static int __kmp_ncores;
#endif
static int *__kmp_pu_os_idx = NULL;
//
// __kmp_affinity_uniform_topology() doesn't work when called from
@ -421,6 +422,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
// Allocate the data structure to be returned.
//
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
//
// When affinity is off, this routine will still be called to set
@ -464,6 +466,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
addr.labels[1] = core_identifier; // core
addr.labels[2] = pu_identifier; // pu
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
__kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
nActiveThreads++;
++num_active_threads; // count active threads per core
}
@ -668,7 +671,16 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
__kmp_nThreadsPerCore, __kmp_ncores);
}
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
if (__kmp_affinity_type == affinity_none) {
int avail_ct = 0;
unsigned int i;
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
continue;
__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
}
return 0;
}
@ -686,7 +698,7 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
continue;
}
__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Address addr(1);
addr.labels[0] = i;
(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
@ -742,6 +754,8 @@ __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
//
*address2os = (AddrUnsPair*)
__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
int avail_ct = 0;
int i;
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
@ -751,7 +765,7 @@ __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
continue;
}
__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Address addr(2);
addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
@ -1267,7 +1281,12 @@ __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
__kmp_nThreadsPerCore, __kmp_ncores);
}
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
for (i = 0; i < nApics; ++i) {
__kmp_pu_os_idx[i] = threadInfo[i].osId;
}
if (__kmp_affinity_type == affinity_none) {
__kmp_free(threadInfo);
KMP_CPU_FREE(oldMask);
@ -1745,7 +1764,12 @@ __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
__kmp_str_buf_free(&buf);
}
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
for (proc = 0; (int)proc < nApics; ++proc) {
__kmp_pu_os_idx[proc] = retval[proc].second;
}
if (__kmp_affinity_type == affinity_none) {
__kmp_free(last);
__kmp_free(maxCt);
@ -2491,6 +2515,13 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
}
# endif // KMP_MIC && REDUCE_TEAM_SIZE
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
for (i = 0; i < num_avail; ++i) { // fill the os indices
__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
}
if (__kmp_affinity_type == affinity_none) {
__kmp_free(lastId);
__kmp_free(totals);
@ -3391,10 +3422,11 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
if (__kmp_place_num_sockets == 0 &&
__kmp_place_num_cores == 0 &&
__kmp_place_num_threads_per_core == 0 )
return; // no topology limiting actions requested, exit
goto _exit; // no topology limiting actions requested, exit
if (__kmp_place_num_sockets == 0)
__kmp_place_num_sockets = nPackages; // use all available sockets
if (__kmp_place_num_cores == 0)
@ -3405,42 +3437,66 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffHWSubsetNonUniform );
return; // don't support non-uniform topology
goto _exit; // don't support non-uniform topology
}
if ( depth != 3 ) {
if ( depth > 3 ) {
KMP_WARNING( AffHWSubsetNonThreeLevel );
return; // don't support not-3-level topology
goto _exit; // don't support not-3-level topology
}
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
KMP_WARNING(AffHWSubsetManySockets);
return;
goto _exit;
}
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
KMP_WARNING( AffHWSubsetManyCores );
return;
goto _exit;
}
AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
AddrUnsPair *newAddr;
if (pAddr) // pAddr is NULL in case of affinity_none
newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
int i, j, k, n_old = 0, n_new = 0;
for (i = 0; i < nPackages; ++i)
for (i = 0; i < nPackages; ++i) {
if (i < __kmp_place_socket_offset ||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
else
for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
if (__kmp_pu_os_idx != NULL) {
for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
}
} else {
for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
if (j < __kmp_place_core_offset ||
j >= __kmp_place_core_offset + __kmp_place_num_cores)
j >= __kmp_place_core_offset + __kmp_place_num_cores) {
n_old += __kmp_nThreadsPerCore; // skip not-requested core
else
if (__kmp_pu_os_idx != NULL) {
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
} else {
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
if (k < __kmp_place_num_threads_per_core) {
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
if (pAddr)
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
n_new++;
} else {
if (__kmp_pu_os_idx != NULL)
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
n_old++;
++proc_num;
}
}
}
}
}
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
__kmp_place_num_threads_per_core);
@ -3451,8 +3507,15 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
__kmp_avail_proc = n_new; // correct avail_proc
__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
__kmp_free( *pAddr );
*pAddr = newAddr; // replace old topology with new one
if (pAddr) {
__kmp_free( *pAddr );
*pAddr = newAddr; // replace old topology with new one
}
_exit:
if (__kmp_pu_os_idx != NULL) {
__kmp_free(__kmp_pu_os_idx);
__kmp_pu_os_idx = NULL;
}
}
@ -3460,6 +3523,12 @@ static AddrUnsPair *address2os = NULL;
static int * procarr = NULL;
static int __kmp_aff_depth = 0;
#define KMP_EXIT_AFF_NONE \
KMP_ASSERT(__kmp_affinity_type == affinity_none); \
KMP_ASSERT(address2os == NULL); \
__kmp_apply_thread_places(NULL, 0); \
return;
static void
__kmp_aux_affinity_initialize(void)
{
@ -3536,9 +3605,7 @@ __kmp_aux_affinity_initialize(void)
if(!__kmp_hwloc_error) {
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
} else if(depth < 0 && __kmp_affinity_verbose) {
KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
}
@ -3558,9 +3625,7 @@ __kmp_aux_affinity_initialize(void)
file_name = NULL;
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
@ -3577,9 +3642,7 @@ __kmp_aux_affinity_initialize(void)
file_name = NULL;
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
}
}
@ -3607,9 +3670,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
fclose(f);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
}
}
@ -3646,9 +3707,7 @@ __kmp_aux_affinity_initialize(void)
file_name = "";
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
KMP_ASSERT(depth > 0);
KMP_ASSERT(address2os != NULL);
@ -3671,9 +3730,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
@ -3688,9 +3745,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
@ -3748,8 +3803,7 @@ __kmp_aux_affinity_initialize(void)
}
if (__kmp_affinity_type == affinity_none) {
KMP_ASSERT(depth == 0);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
}
@ -3777,9 +3831,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
// should not fail
KMP_ASSERT(depth > 0);
@ -3793,9 +3845,7 @@ __kmp_aux_affinity_initialize(void)
}
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
if (depth == 0) {
KMP_ASSERT(__kmp_affinity_type == affinity_none);
KMP_ASSERT(address2os == NULL);
return;
KMP_EXIT_AFF_NONE;
}
}
# endif // KMP_USE_HWLOC
@ -4014,6 +4064,7 @@ __kmp_aux_affinity_initialize(void)
__kmp_free(osId2Mask);
machine_hierarchy.init(address2os, __kmp_avail_proc);
}
#undef KMP_EXIT_AFF_NONE
void