Change hwloc discovery algorithm to print topology only for accessible resources

Change hwloc discovery algorithm to print topology for only accessible
resources, and report uniformity correspondingly, similar to what other topology
discovery algorithms do. Fixes minor inconsistency in total topology reported
and resources used for threads binding in case hwloc used.

Patch by Andrey Churbanov.

Differential Revision: http://reviews.llvm.org/D21389

llvm-svn: 272952
This commit is contained in:
Jonathan Peyton 2016-06-16 20:31:19 +00:00
parent 0f3c2b921d
commit bf35771bcc
1 changed files with 29 additions and 17 deletions

View File

@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
int pkgLevel = 0;
int coreLevel = 1;
int threadLevel = 2;
nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
if (! KMP_AFFINITY_CAPABLE())
{
@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
KMP_ASSERT(__kmp_affinity_type == affinity_none);
nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
if (__kmp_affinity_verbose) {
@ -423,23 +422,34 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
//
// When affinity is off, this routine will still be called to set
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
// nCoresPerPkg, & nPackages. Make sure all these vars are set
// correctly, and return if affinity is not enabled.
//
hwloc_obj_t pu;
hwloc_obj_t core;
hwloc_obj_t socket;
int nActiveThreads = 0;
int socket_identifier = 0;
// re-calculate globals to count only accessible resources
__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
socket != NULL;
socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
socket_identifier++)
{
int core_identifier = 0;
int num_active_cores = 0;
for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
core_identifier++)
{
int pu_identifier = 0;
int num_active_threads = 0;
for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
@ -447,7 +457,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
{
Address addr(3);
if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
continue;
continue; // skip inactive (inaccessible) unit
KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
addr.labels[0] = socket_identifier; // package
@ -455,13 +465,26 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
addr.labels[2] = pu_identifier; // pu
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
nActiveThreads++;
++num_active_threads; // count active threads per core
}
if (num_active_threads) { // were there any active threads on the core?
++__kmp_ncores; // count total active cores
++num_active_cores; // count active cores per socket
if (num_active_threads > __kmp_nThreadsPerCore)
__kmp_nThreadsPerCore = num_active_threads; // calc maximum
}
}
if (num_active_cores) { // were there any active cores on the socket?
++nPackages; // count total active packages
if (num_active_cores > nCoresPerPkg)
nCoresPerPkg = num_active_cores; // calc maximum
}
}
//
// If there's only one thread context to bind to, return now.
//
KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
KMP_ASSERT(nActiveThreads > 0);
if (nActiveThreads == 1) {
__kmp_ncores = nPackages = 1;
@ -513,21 +536,10 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
//
qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
//
// When affinity is off, this routine will still be called to set
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
// nCoresPerPkg, & nPackages. Make sure all these vars are set
// correctly, and return if affinity is not enabled.
//
__kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
//
// Check to see if the machine topology is uniform
//
unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
unsigned ncores = __kmp_ncores;
unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
//
// Print the machine topology summary.
@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
__kmp_str_buf_print(&buf, "%d", npackages);
__kmp_str_buf_print(&buf, "%d", nPackages);
//for (level = 1; level <= pkgLevel; level++) {
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
// }