forked from OSchip/llvm-project
Improvements to process affinity mask setting
A couple improvements: 1) Add ability to limit fullMask size when KMP_HW_SUBSET limits resources. 2) Make KMP_HW_SUBSET work for affinity_none, and only limit fullMask in this case. Patch by Andrey Churbanov. Differential Revision: http://reviews.llvm.org/D21528 llvm-svn: 273278
This commit is contained in:
parent
aee40fc61c
commit
fd7cc42fed
|
@ -249,6 +249,7 @@ static int __kmp_nThreadsPerCore;
|
|||
#ifndef KMP_DFLT_NTH_CORES
|
||||
static int __kmp_ncores;
|
||||
#endif
|
||||
static int *__kmp_pu_os_idx = NULL;
|
||||
|
||||
//
|
||||
// __kmp_affinity_uniform_topology() doesn't work when called from
|
||||
|
@ -421,6 +422,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
|||
// Allocate the data structure to be returned.
|
||||
//
|
||||
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
|
||||
//
|
||||
// When affinity is off, this routine will still be called to set
|
||||
|
@ -464,6 +466,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
|||
addr.labels[1] = core_identifier; // core
|
||||
addr.labels[2] = pu_identifier; // pu
|
||||
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
|
||||
__kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
|
||||
nActiveThreads++;
|
||||
++num_active_threads; // count active threads per core
|
||||
}
|
||||
|
@ -668,7 +671,16 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
|
|||
KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
}
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
int avail_ct = 0;
|
||||
unsigned int i;
|
||||
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
|
||||
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
|
||||
continue;
|
||||
__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -686,7 +698,7 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
|
|||
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
|
||||
Address addr(1);
|
||||
addr.labels[0] = i;
|
||||
(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
|
||||
|
@ -742,6 +754,8 @@ __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
|
|||
//
|
||||
*address2os = (AddrUnsPair*)
|
||||
__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
int avail_ct = 0;
|
||||
int i;
|
||||
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
|
||||
|
@ -751,7 +765,7 @@ __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
|
|||
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
|
||||
Address addr(2);
|
||||
addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
|
||||
addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
|
||||
|
@ -1267,7 +1281,12 @@ __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
|
|||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
for (i = 0; i < nApics; ++i) {
|
||||
__kmp_pu_os_idx[i] = threadInfo[i].osId;
|
||||
}
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
__kmp_free(threadInfo);
|
||||
KMP_CPU_FREE(oldMask);
|
||||
|
@ -1745,7 +1764,12 @@ __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
|
|||
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
for (proc = 0; (int)proc < nApics; ++proc) {
|
||||
__kmp_pu_os_idx[proc] = retval[proc].second;
|
||||
}
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
__kmp_free(last);
|
||||
__kmp_free(maxCt);
|
||||
|
@ -2491,6 +2515,13 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
|
|||
}
|
||||
# endif // KMP_MIC && REDUCE_TEAM_SIZE
|
||||
|
||||
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
|
||||
KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
|
||||
__kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
|
||||
for (i = 0; i < num_avail; ++i) { // fill the os indices
|
||||
__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
|
||||
}
|
||||
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
__kmp_free(lastId);
|
||||
__kmp_free(totals);
|
||||
|
@ -3391,10 +3422,11 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
static void
|
||||
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
|
||||
{
|
||||
int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
|
||||
if (__kmp_place_num_sockets == 0 &&
|
||||
__kmp_place_num_cores == 0 &&
|
||||
__kmp_place_num_threads_per_core == 0 )
|
||||
return; // no topology limiting actions requested, exit
|
||||
goto _exit; // no topology limiting actions requested, exit
|
||||
if (__kmp_place_num_sockets == 0)
|
||||
__kmp_place_num_sockets = nPackages; // use all available sockets
|
||||
if (__kmp_place_num_cores == 0)
|
||||
|
@ -3405,42 +3437,66 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
|
|||
|
||||
if ( !__kmp_affinity_uniform_topology() ) {
|
||||
KMP_WARNING( AffHWSubsetNonUniform );
|
||||
return; // don't support non-uniform topology
|
||||
goto _exit; // don't support non-uniform topology
|
||||
}
|
||||
if ( depth != 3 ) {
|
||||
if ( depth > 3 ) {
|
||||
KMP_WARNING( AffHWSubsetNonThreeLevel );
|
||||
return; // don't support not-3-level topology
|
||||
goto _exit; // don't support not-3-level topology
|
||||
}
|
||||
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
|
||||
KMP_WARNING(AffHWSubsetManySockets);
|
||||
return;
|
||||
goto _exit;
|
||||
}
|
||||
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
|
||||
KMP_WARNING( AffHWSubsetManyCores );
|
||||
return;
|
||||
goto _exit;
|
||||
}
|
||||
|
||||
AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
|
||||
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
|
||||
AddrUnsPair *newAddr;
|
||||
if (pAddr) // pAddr is NULL in case of affinity_none
|
||||
newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
|
||||
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
|
||||
|
||||
int i, j, k, n_old = 0, n_new = 0;
|
||||
for (i = 0; i < nPackages; ++i)
|
||||
for (i = 0; i < nPackages; ++i) {
|
||||
if (i < __kmp_place_socket_offset ||
|
||||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
|
||||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
|
||||
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
|
||||
else
|
||||
for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
|
||||
if (j < __kmp_place_core_offset ||
|
||||
j >= __kmp_place_core_offset + __kmp_place_num_cores)
|
||||
j >= __kmp_place_core_offset + __kmp_place_num_cores) {
|
||||
n_old += __kmp_nThreadsPerCore; // skip not-requested core
|
||||
else
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
|
||||
if (k < __kmp_place_num_threads_per_core) {
|
||||
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
|
||||
if (pAddr)
|
||||
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
|
||||
n_new++;
|
||||
} else {
|
||||
if (__kmp_pu_os_idx != NULL)
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
}
|
||||
n_old++;
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
|
||||
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
|
||||
__kmp_place_num_threads_per_core);
|
||||
|
@ -3451,8 +3507,15 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
|
|||
__kmp_avail_proc = n_new; // correct avail_proc
|
||||
__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
|
||||
|
||||
__kmp_free( *pAddr );
|
||||
*pAddr = newAddr; // replace old topology with new one
|
||||
if (pAddr) {
|
||||
__kmp_free( *pAddr );
|
||||
*pAddr = newAddr; // replace old topology with new one
|
||||
}
|
||||
_exit:
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
__kmp_free(__kmp_pu_os_idx);
|
||||
__kmp_pu_os_idx = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -3460,6 +3523,12 @@ static AddrUnsPair *address2os = NULL;
|
|||
static int * procarr = NULL;
|
||||
static int __kmp_aff_depth = 0;
|
||||
|
||||
#define KMP_EXIT_AFF_NONE \
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none); \
|
||||
KMP_ASSERT(address2os == NULL); \
|
||||
__kmp_apply_thread_places(NULL, 0); \
|
||||
return;
|
||||
|
||||
static void
|
||||
__kmp_aux_affinity_initialize(void)
|
||||
{
|
||||
|
@ -3536,9 +3605,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
if(!__kmp_hwloc_error) {
|
||||
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
} else if(depth < 0 && __kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
|
||||
}
|
||||
|
@ -3558,9 +3625,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
file_name = NULL;
|
||||
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
|
||||
if (depth < 0) {
|
||||
|
@ -3577,9 +3642,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
file_name = NULL;
|
||||
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3607,9 +3670,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
|
||||
fclose(f);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3646,9 +3707,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
file_name = "";
|
||||
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
KMP_ASSERT(depth > 0);
|
||||
KMP_ASSERT(address2os != NULL);
|
||||
|
@ -3671,9 +3730,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
|
||||
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
if (depth < 0) {
|
||||
KMP_ASSERT(msg_id != kmp_i18n_null);
|
||||
|
@ -3688,9 +3745,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
|
||||
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
if (depth < 0) {
|
||||
KMP_ASSERT(msg_id != kmp_i18n_null);
|
||||
|
@ -3748,8 +3803,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
}
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
KMP_ASSERT(depth == 0);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3777,9 +3831,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
|
||||
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
// should not fail
|
||||
KMP_ASSERT(depth > 0);
|
||||
|
@ -3793,9 +3845,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
}
|
||||
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
KMP_EXIT_AFF_NONE;
|
||||
}
|
||||
}
|
||||
# endif // KMP_USE_HWLOC
|
||||
|
@ -4014,6 +4064,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
__kmp_free(osId2Mask);
|
||||
machine_hierarchy.init(address2os, __kmp_avail_proc);
|
||||
}
|
||||
#undef KMP_EXIT_AFF_NONE
|
||||
|
||||
|
||||
void
|
||||
|
|
Loading…
Reference in New Issue