forked from OSchip/llvm-project
KMP_HW_SUBSET extended with NUMA support when HWLOC enabled
Differential Revision: https://reviews.llvm.org/D31600 llvm-svn: 300220
This commit is contained in:
parent
7840dc8451
commit
4a9a89241b
|
@ -351,7 +351,7 @@ kmpc_set_defaults 224
|
|||
%ifdef OMP_30
|
||||
__kmpc_omp_taskyield 235
|
||||
%endif # OMP_30
|
||||
__kmpc_place_threads 236
|
||||
# __kmpc_place_threads 236
|
||||
%endif
|
||||
|
||||
# OpenMP 4.0 entry points
|
||||
|
|
|
@ -38,7 +38,7 @@ Language "English"
|
|||
Country "USA"
|
||||
LangId "1033"
|
||||
Version "2"
|
||||
Revision "20160714"
|
||||
Revision "20161216"
|
||||
|
||||
|
||||
|
||||
|
@ -388,8 +388,8 @@ OBSOLETE "%1$s: granularity=core will be used."
|
|||
EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored."
|
||||
FutexNotSupported "futex system call not supported; %1$s=%2$s ignored."
|
||||
AffGranUsing "%1$s: granularity=%2$s will be used."
|
||||
AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt "
|
||||
"(nSockets@offset, nCores@offset, nTthreads per core)\"."
|
||||
AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"N<item>[@N][,...][,Nt] "
|
||||
"(<item> can be S, N, L2, C, T for Socket, NUMA Node, L2 Cache, Core, Thread)\"."
|
||||
AffHWSubsetUnsupported "KMP_HW_SUBSET ignored: unsupported architecture."
|
||||
AffHWSubsetManyCores "KMP_HW_SUBSET ignored: too many cores requested."
|
||||
SyntaxErrorUsing "%1$s: syntax error, using %2$s."
|
||||
|
@ -411,6 +411,10 @@ AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal af
|
|||
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
|
||||
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
|
||||
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
|
||||
AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"
|
||||
AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes requested."
|
||||
AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested."
|
||||
AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested."
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
|
|
|
@ -774,11 +774,19 @@ typedef enum kmp_cancel_kind_t {
|
|||
} kmp_cancel_kind_t;
|
||||
#endif // OMP_40_ENABLED
|
||||
|
||||
extern int __kmp_place_num_sockets;
|
||||
extern int __kmp_place_socket_offset;
|
||||
extern int __kmp_place_num_cores;
|
||||
extern int __kmp_place_core_offset;
|
||||
extern int __kmp_place_num_threads_per_core;
|
||||
// KMP_HW_SUBSET support:
|
||||
typedef struct kmp_hws_item {
|
||||
int num;
|
||||
int offset;
|
||||
} kmp_hws_item_t;
|
||||
|
||||
extern kmp_hws_item_t __kmp_hws_socket;
|
||||
extern kmp_hws_item_t __kmp_hws_node;
|
||||
extern kmp_hws_item_t __kmp_hws_tile;
|
||||
extern kmp_hws_item_t __kmp_hws_core;
|
||||
extern kmp_hws_item_t __kmp_hws_proc;
|
||||
extern int __kmp_hws_requested;
|
||||
extern int __kmp_hws_abs_flag; // absolute or per-item number requested
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
|
@ -3494,9 +3502,6 @@ KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
|
|||
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
|
||||
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
|
||||
|
||||
// this function exported for testing of KMP_PLACE_THREADS functionality
|
||||
KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
|
||||
|
|
|
@ -3405,102 +3405,665 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
#undef ADD_MASK
|
||||
#undef ADD_MASK_OSID
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
static int
|
||||
__kmp_hwloc_count_children_by_type(
|
||||
hwloc_topology_t t, hwloc_obj_t o, hwloc_obj_type_t type, hwloc_obj_t* f)
|
||||
{
|
||||
if (!hwloc_compare_types(o->type, type)) {
|
||||
if (*f == NULL)
|
||||
*f = o; // output first descendant found
|
||||
return 1;
|
||||
}
|
||||
int sum = 0;
|
||||
for (unsigned i = 0; i < o->arity; i++)
|
||||
sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
|
||||
return sum; // will be 0 if no one found (as PU arity is 0)
|
||||
}
|
||||
|
||||
static int
|
||||
__kmp_hwloc_count_children_by_depth(
|
||||
hwloc_topology_t t, hwloc_obj_t o, unsigned depth, hwloc_obj_t* f)
|
||||
{
|
||||
if (o->depth == depth) {
|
||||
if (*f == NULL)
|
||||
*f = o; // output first descendant found
|
||||
return 1;
|
||||
}
|
||||
int sum = 0;
|
||||
for (unsigned i = 0; i < o->arity; i++)
|
||||
sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
|
||||
return sum; // will be 0 if no one found (as PU arity is 0)
|
||||
}
|
||||
|
||||
static int
|
||||
__kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o)
|
||||
{ // skip PUs descendants of the object o
|
||||
int skipped = 0;
|
||||
hwloc_obj_t hT = NULL;
|
||||
int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
KMP_DEBUG_ASSERT(hT);
|
||||
unsigned idx = hT->os_index;
|
||||
if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
|
||||
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
|
||||
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
|
||||
++skipped;
|
||||
}
|
||||
hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
|
||||
}
|
||||
return skipped; // count number of skipped units
|
||||
}
|
||||
|
||||
static int
|
||||
__kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o)
|
||||
{ // check if obj has PUs present in fullMask
|
||||
hwloc_obj_t hT = NULL;
|
||||
int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
|
||||
for (int i = 0; i < N; ++i) {
|
||||
KMP_DEBUG_ASSERT(hT);
|
||||
unsigned idx = hT->os_index;
|
||||
if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
|
||||
return 1; // found PU
|
||||
hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
|
||||
}
|
||||
return 0; // no PUs found
|
||||
}
|
||||
#endif // KMP_USE_HWLOC
|
||||
|
||||
static void
|
||||
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
|
||||
{
|
||||
int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
|
||||
if (__kmp_place_num_sockets == 0 &&
|
||||
__kmp_place_num_cores == 0 &&
|
||||
__kmp_place_num_threads_per_core == 0 )
|
||||
goto _exit; // no topology limiting actions requested, exit
|
||||
if (__kmp_place_num_sockets == 0)
|
||||
__kmp_place_num_sockets = nPackages; // use all available sockets
|
||||
if (__kmp_place_num_cores == 0)
|
||||
__kmp_place_num_cores = nCoresPerPkg; // use all available cores
|
||||
if (__kmp_place_num_threads_per_core == 0 ||
|
||||
__kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
|
||||
__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
|
||||
AddrUnsPair *newAddr;
|
||||
if (__kmp_hws_requested == 0)
|
||||
goto _exit; // no topology limiting actions requested, exit
|
||||
#if KMP_USE_HWLOC
|
||||
if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
|
||||
// Number of subobjects calculated dynamically, this works fine for
|
||||
// any non-uniform topology.
|
||||
// L2 cache objects are determined by depth, other objects - by type.
|
||||
hwloc_topology_t tp = __kmp_hwloc_topology;
|
||||
int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
|
||||
int nCr=0, nTr=0; // number of requested units
|
||||
int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
|
||||
hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
|
||||
int L2depth, idx;
|
||||
|
||||
if ( !__kmp_affinity_uniform_topology() ) {
|
||||
// check support of extensions ----------------------------------
|
||||
int numa_support = 0, tile_support = 0;
|
||||
if (__kmp_pu_os_idx)
|
||||
hT = hwloc_get_pu_obj_by_os_index(
|
||||
tp, __kmp_pu_os_idx[__kmp_avail_proc - 1]);
|
||||
else
|
||||
hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
|
||||
if (hT == NULL) { // something's gone wrong
|
||||
KMP_WARNING(AffHWSubsetUnsupported);
|
||||
goto _exit;
|
||||
}
|
||||
// check NUMA node
|
||||
hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
|
||||
hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
|
||||
if (hN != NULL && hN->depth > hS->depth) {
|
||||
numa_support = 1; // 1 in case socket includes node(s)
|
||||
} else if (__kmp_hws_node.num > 0) {
|
||||
// don't support sockets inside NUMA node (no such HW found for testing)
|
||||
KMP_WARNING(AffHWSubsetUnsupported);
|
||||
goto _exit;
|
||||
}
|
||||
// check L2 cahce, get object by depth because of multiple caches
|
||||
L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
|
||||
hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
|
||||
if (hL != NULL && __kmp_hwloc_count_children_by_type(
|
||||
tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
|
||||
tile_support = 1; // no sense to count L2 if it includes single core
|
||||
} else if (__kmp_hws_tile.num > 0) {
|
||||
if (__kmp_hws_core.num == 0) {
|
||||
__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
|
||||
__kmp_hws_tile.num = 0;
|
||||
} else {
|
||||
// L2 and core are both requested, but represent same object
|
||||
KMP_WARNING(AffHWSubsetInvalid);
|
||||
goto _exit;
|
||||
}
|
||||
}
|
||||
// end of check of extensions -----------------------------------
|
||||
|
||||
// fill in unset items, validate settings -----------------------
|
||||
if (__kmp_hws_socket.num == 0)
|
||||
__kmp_hws_socket.num = nPackages; // use all available sockets
|
||||
if (__kmp_hws_socket.offset >= nPackages) {
|
||||
KMP_WARNING(AffHWSubsetManySockets);
|
||||
goto _exit;
|
||||
}
|
||||
if (numa_support) {
|
||||
int NN = __kmp_hwloc_count_children_by_type(
|
||||
tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in socket
|
||||
if (__kmp_hws_node.num == 0)
|
||||
__kmp_hws_node.num = NN; // use all available nodes
|
||||
if (__kmp_hws_node.offset >= NN) {
|
||||
KMP_WARNING(AffHWSubsetManyNodes);
|
||||
goto _exit;
|
||||
}
|
||||
if (tile_support) {
|
||||
// get num tiles in node
|
||||
int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
|
||||
if (__kmp_hws_tile.num == 0) {
|
||||
__kmp_hws_tile.num = NL + 1;
|
||||
} // use all available tiles, some node may have more tiles, thus +1
|
||||
if (__kmp_hws_tile.offset >= NL) {
|
||||
KMP_WARNING(AffHWSubsetManyTiles);
|
||||
goto _exit;
|
||||
}
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
|
||||
if (__kmp_hws_core.num == 0)
|
||||
__kmp_hws_core.num = NC; // use all available cores
|
||||
if (__kmp_hws_core.offset >= NC) {
|
||||
KMP_WARNING(AffHWSubsetManyCores);
|
||||
goto _exit;
|
||||
}
|
||||
} else { // tile_support
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in node
|
||||
if (__kmp_hws_core.num == 0)
|
||||
__kmp_hws_core.num = NC; // use all available cores
|
||||
if (__kmp_hws_core.offset >= NC) {
|
||||
KMP_WARNING(AffHWSubsetManyCores);
|
||||
goto _exit;
|
||||
}
|
||||
} // tile_support
|
||||
} else { // numa_support
|
||||
if (tile_support) {
|
||||
// get num tiles in socket
|
||||
int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
|
||||
if (__kmp_hws_tile.num == 0)
|
||||
__kmp_hws_tile.num = NL; // use all available tiles
|
||||
if (__kmp_hws_tile.offset >= NL) {
|
||||
KMP_WARNING(AffHWSubsetManyTiles);
|
||||
goto _exit;
|
||||
}
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
|
||||
if (__kmp_hws_core.num == 0)
|
||||
__kmp_hws_core.num = NC; // use all available cores
|
||||
if (__kmp_hws_core.offset >= NC) {
|
||||
KMP_WARNING(AffHWSubsetManyCores);
|
||||
goto _exit;
|
||||
}
|
||||
} else { // tile_support
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
|
||||
if (__kmp_hws_core.num == 0)
|
||||
__kmp_hws_core.num = NC; // use all available cores
|
||||
if (__kmp_hws_core.offset >= NC) {
|
||||
KMP_WARNING(AffHWSubsetManyCores);
|
||||
goto _exit;
|
||||
}
|
||||
} // tile_support
|
||||
}
|
||||
if (__kmp_hws_proc.num == 0)
|
||||
__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
|
||||
if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
|
||||
KMP_WARNING(AffHWSubsetManyProcs);
|
||||
goto _exit;
|
||||
}
|
||||
// end of validation --------------------------------------------
|
||||
|
||||
if (pAddr) // pAddr is NULL in case of affinity_none
|
||||
newAddr = (AddrUnsPair *)__kmp_allocate(
|
||||
sizeof(AddrUnsPair) * __kmp_avail_proc); // max size
|
||||
// main loop to form HW subset ----------------------------------
|
||||
hS = NULL;
|
||||
int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
|
||||
for (int s = 0; s < NP; ++s) {
|
||||
// Check Socket -----------------------------------------------
|
||||
hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hS))
|
||||
continue; // skip socket if all PUs are out of fullMask
|
||||
++nS; // only count objects those have PUs in affinity mask
|
||||
if (nS <= __kmp_hws_socket.offset ||
|
||||
nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
|
||||
continue; // move to next socket
|
||||
}
|
||||
nCr = 0; // count number of cores per socket
|
||||
// socket requested, go down the topology tree
|
||||
// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
|
||||
if (numa_support) {
|
||||
nN = 0;
|
||||
hN = NULL;
|
||||
int NN = __kmp_hwloc_count_children_by_type(
|
||||
tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in current socket
|
||||
for (int n = 0; n < NN; ++n) {
|
||||
// Check NUMA Node ----------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
|
||||
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
|
||||
continue; // skip node if all PUs are out of fullMask
|
||||
}
|
||||
++nN;
|
||||
if (nN <= __kmp_hws_node.offset ||
|
||||
nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
|
||||
// skip node as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
|
||||
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
|
||||
continue; // move to next node
|
||||
}
|
||||
// node requested, go down the topology tree
|
||||
if (tile_support) {
|
||||
nL = 0;
|
||||
hL = NULL;
|
||||
int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
|
||||
for (int l = 0; l < NL; ++l) {
|
||||
// Check L2 (tile) ------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
continue; // skip tile if all PUs are out of fullMask
|
||||
}
|
||||
++nL;
|
||||
if (nL <= __kmp_hws_tile.offset ||
|
||||
nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
|
||||
// skip tile as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
continue; // move to next tile
|
||||
}
|
||||
// tile requested, go down the topology tree
|
||||
nC = 0;
|
||||
hC = NULL;
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in current tile
|
||||
for (int c = 0; c < NC; ++c) {
|
||||
// Check Core ---------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // skip core if all PUs are out of fullMask
|
||||
}
|
||||
++nC;
|
||||
if (nC <= __kmp_hws_core.offset ||
|
||||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
|
||||
// skip node as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // move to next node
|
||||
}
|
||||
// core requested, go down to PUs
|
||||
nT = 0;
|
||||
nTr = 0;
|
||||
hT = NULL;
|
||||
int NT = __kmp_hwloc_count_children_by_type(
|
||||
tp, hC, HWLOC_OBJ_PU, &hT); // num procs in current core
|
||||
for (int t = 0; t < NT; ++t) {
|
||||
// Check PU ---------------------------------------
|
||||
idx = hT->os_index;
|
||||
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // skip PU if not in fullMask
|
||||
}
|
||||
++nT;
|
||||
if (nT <= __kmp_hws_proc.offset ||
|
||||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
|
||||
// skip PU
|
||||
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
|
||||
++n_old;
|
||||
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // move to next node
|
||||
}
|
||||
++nTr;
|
||||
if (pAddr) // collect requested thread's data
|
||||
newAddr[n_new] = (*pAddr)[n_old];
|
||||
++n_new;
|
||||
++n_old;
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
} // threads loop
|
||||
if (nTr > 0) {
|
||||
++nCr; // num cores per socket
|
||||
++nCo; // total num cores
|
||||
if (nTr > nTpC)
|
||||
nTpC = nTr; // calc max threads per core
|
||||
}
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
} // cores loop
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
} // tiles loop
|
||||
} else { // tile_support
|
||||
// no tiles, check cores
|
||||
nC = 0;
|
||||
hC = NULL;
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in current node
|
||||
for (int c = 0; c < NC; ++c) {
|
||||
// Check Core ---------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // skip core if all PUs are out of fullMask
|
||||
}
|
||||
++nC;
|
||||
if (nC <= __kmp_hws_core.offset ||
|
||||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
|
||||
// skip node as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // move to next node
|
||||
}
|
||||
// core requested, go down to PUs
|
||||
nT = 0;
|
||||
nTr = 0;
|
||||
hT = NULL;
|
||||
int NT = __kmp_hwloc_count_children_by_type(
|
||||
tp, hC, HWLOC_OBJ_PU, &hT);
|
||||
for (int t = 0; t < NT; ++t) {
|
||||
// Check PU ---------------------------------------
|
||||
idx = hT->os_index;
|
||||
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // skip PU if not in fullMask
|
||||
}
|
||||
++nT;
|
||||
if (nT <= __kmp_hws_proc.offset ||
|
||||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
|
||||
// skip PU
|
||||
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
|
||||
++n_old;
|
||||
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // move to next node
|
||||
}
|
||||
++nTr;
|
||||
if (pAddr) // collect requested thread's data
|
||||
newAddr[n_new] = (*pAddr)[n_old];
|
||||
++n_new;
|
||||
++n_old;
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
} // threads loop
|
||||
if (nTr > 0) {
|
||||
++nCr; // num cores per socket
|
||||
++nCo; // total num cores
|
||||
if (nTr > nTpC)
|
||||
nTpC = nTr; // calc max threads per core
|
||||
}
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
} // cores loop
|
||||
} // tiles support
|
||||
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
|
||||
} // nodes loop
|
||||
} else { // numa_support
|
||||
// no NUMA support
|
||||
if (tile_support) {
|
||||
nL = 0;
|
||||
hL = NULL;
|
||||
int NL = __kmp_hwloc_count_children_by_depth(
|
||||
tp, hS, L2depth, &hL); // num tiles in current socket
|
||||
for (int l = 0; l < NL; ++l) {
|
||||
// Check L2 (tile) ------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
continue; // skip tile if all PUs are out of fullMask
|
||||
}
|
||||
++nL;
|
||||
if (nL <= __kmp_hws_tile.offset ||
|
||||
nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
|
||||
// skip tile as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
continue; // move to next tile
|
||||
}
|
||||
// tile requested, go down the topology tree
|
||||
nC = 0;
|
||||
hC = NULL;
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores per tile
|
||||
for (int c = 0; c < NC; ++c) {
|
||||
// Check Core ---------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // skip core if all PUs are out of fullMask
|
||||
}
|
||||
++nC;
|
||||
if (nC <= __kmp_hws_core.offset ||
|
||||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
|
||||
// skip node as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // move to next node
|
||||
}
|
||||
// core requested, go down to PUs
|
||||
nT = 0;
|
||||
nTr = 0;
|
||||
hT = NULL;
|
||||
int NT = __kmp_hwloc_count_children_by_type(
|
||||
tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
|
||||
for (int t = 0; t < NT; ++t) {
|
||||
// Check PU ---------------------------------------
|
||||
idx = hT->os_index;
|
||||
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // skip PU if not in fullMask
|
||||
}
|
||||
++nT;
|
||||
if (nT <= __kmp_hws_proc.offset ||
|
||||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
|
||||
// skip PU
|
||||
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
|
||||
++n_old;
|
||||
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // move to next node
|
||||
}
|
||||
++nTr;
|
||||
if (pAddr) // collect requested thread's data
|
||||
newAddr[n_new] = (*pAddr)[n_old];
|
||||
++n_new;
|
||||
++n_old;
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
} // threads loop
|
||||
if (nTr > 0) {
|
||||
++nCr; // num cores per socket
|
||||
++nCo; // total num cores
|
||||
if (nTr > nTpC)
|
||||
nTpC = nTr; // calc max threads per core
|
||||
}
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
} // cores loop
|
||||
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
|
||||
} // tiles loop
|
||||
} else { // tile_support
|
||||
// no tiles, check cores
|
||||
nC = 0;
|
||||
hC = NULL;
|
||||
int NC = __kmp_hwloc_count_children_by_type(
|
||||
tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
|
||||
for (int c = 0; c < NC; ++c) {
|
||||
// Check Core -------------------------------------------
|
||||
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // skip core if all PUs are out of fullMask
|
||||
}
|
||||
++nC;
|
||||
if (nC <= __kmp_hws_core.offset ||
|
||||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
|
||||
// skip node as not requested
|
||||
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
continue; // move to next node
|
||||
}
|
||||
// core requested, go down to PUs
|
||||
nT = 0;
|
||||
nTr = 0;
|
||||
hT = NULL;
|
||||
int NT = __kmp_hwloc_count_children_by_type(
|
||||
tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
|
||||
for (int t = 0; t < NT; ++t) {
|
||||
// Check PU ---------------------------------------
|
||||
idx = hT->os_index;
|
||||
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // skip PU if not in fullMask
|
||||
}
|
||||
++nT;
|
||||
if (nT <= __kmp_hws_proc.offset ||
|
||||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
|
||||
// skip PU
|
||||
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
|
||||
++n_old;
|
||||
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
continue; // move to next node
|
||||
}
|
||||
++nTr;
|
||||
if (pAddr) // collect requested thread's data
|
||||
newAddr[n_new] = (*pAddr)[n_old];
|
||||
++n_new;
|
||||
++n_old;
|
||||
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
|
||||
} // threads loop
|
||||
if (nTr > 0) {
|
||||
++nCr; // num cores per socket
|
||||
++nCo; // total num cores
|
||||
if (nTr > nTpC)
|
||||
nTpC = nTr; // calc max threads per core
|
||||
}
|
||||
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
|
||||
} // cores loop
|
||||
} // tiles support
|
||||
} // numa_support
|
||||
if (nCr > 0) { // found cores?
|
||||
++nPkg; // num sockets
|
||||
if (nCr > nCpP)
|
||||
nCpP = nCr; // calc max cores per socket
|
||||
}
|
||||
} // sockets loop
|
||||
|
||||
// check the subset is valid
|
||||
KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
|
||||
KMP_DEBUG_ASSERT(nPkg > 0);
|
||||
KMP_DEBUG_ASSERT(nCpP > 0);
|
||||
KMP_DEBUG_ASSERT(nTpC > 0);
|
||||
KMP_DEBUG_ASSERT(nCo > 0);
|
||||
KMP_DEBUG_ASSERT(nPkg <= nPackages);
|
||||
KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
|
||||
KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
|
||||
KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
|
||||
|
||||
nPackages = nPkg; // correct num sockets
|
||||
nCoresPerPkg = nCpP; // correct num cores per socket
|
||||
__kmp_nThreadsPerCore = nTpC; // correct num threads per core
|
||||
__kmp_avail_proc = n_new; // correct num procs
|
||||
__kmp_ncores = nCo; // correct num cores
|
||||
// hwloc topology method end
|
||||
} else
|
||||
#endif // KMP_USE_HWLOC
|
||||
{
|
||||
int n_old = 0, n_new = 0, proc_num = 0;
|
||||
if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
|
||||
KMP_WARNING(AffHWSubsetNoHWLOC);
|
||||
goto _exit;
|
||||
}
|
||||
if (__kmp_hws_socket.num == 0)
|
||||
__kmp_hws_socket.num = nPackages; // use all available sockets
|
||||
if (__kmp_hws_core.num == 0)
|
||||
__kmp_hws_core.num = nCoresPerPkg; // use all available cores
|
||||
if (__kmp_hws_proc.num == 0 ||
|
||||
__kmp_hws_proc.num > __kmp_nThreadsPerCore)
|
||||
__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
|
||||
if ( !__kmp_affinity_uniform_topology() ) {
|
||||
KMP_WARNING( AffHWSubsetNonUniform );
|
||||
goto _exit; // don't support non-uniform topology
|
||||
}
|
||||
if ( depth > 3 ) {
|
||||
}
|
||||
if ( depth > 3 ) {
|
||||
KMP_WARNING( AffHWSubsetNonThreeLevel );
|
||||
goto _exit; // don't support not-3-level topology
|
||||
}
|
||||
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
|
||||
}
|
||||
if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
|
||||
KMP_WARNING(AffHWSubsetManySockets);
|
||||
goto _exit;
|
||||
}
|
||||
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
|
||||
}
|
||||
if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
|
||||
KMP_WARNING( AffHWSubsetManyCores );
|
||||
goto _exit;
|
||||
}
|
||||
|
||||
AddrUnsPair *newAddr;
|
||||
if (pAddr) // pAddr is NULL in case of affinity_none
|
||||
}
|
||||
// Form the requested subset
|
||||
if (pAddr) // pAddr is NULL in case of affinity_none
|
||||
newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
|
||||
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
|
||||
|
||||
for (i = 0; i < nPackages; ++i) {
|
||||
if (i < __kmp_place_socket_offset ||
|
||||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
|
||||
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
__kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num);
|
||||
for (int i = 0; i < nPackages; ++i) {
|
||||
if (i < __kmp_hws_socket.offset ||
|
||||
i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
|
||||
// skip not-requested socket
|
||||
n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
// walk through skipped socket
|
||||
for (int j = 0; j < nCoresPerPkg; ++j) {
|
||||
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
|
||||
if (j < __kmp_place_core_offset ||
|
||||
j >= __kmp_place_core_offset + __kmp_place_num_cores) {
|
||||
n_old += __kmp_nThreadsPerCore; // skip not-requested core
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
|
||||
if (k < __kmp_place_num_threads_per_core) {
|
||||
if (pAddr)
|
||||
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
|
||||
n_new++;
|
||||
} else {
|
||||
if (__kmp_pu_os_idx != NULL)
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
}
|
||||
n_old++;
|
||||
++proc_num;
|
||||
}
|
||||
// walk through requested socket
|
||||
for (int j = 0; j < nCoresPerPkg; ++j) {
|
||||
if (j < __kmp_hws_core.offset ||
|
||||
j >= __kmp_hws_core.offset + __kmp_hws_core.num)
|
||||
{ // skip not-requested core
|
||||
n_old += __kmp_nThreadsPerCore;
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// walk through requested core
|
||||
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
|
||||
if (k < __kmp_hws_proc.num) {
|
||||
if (pAddr) // collect requested thread's data
|
||||
newAddr[n_new] = (*pAddr)[n_old];
|
||||
n_new++;
|
||||
} else {
|
||||
if (__kmp_pu_os_idx != NULL)
|
||||
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
|
||||
}
|
||||
n_old++;
|
||||
++proc_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
|
||||
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
|
||||
__kmp_place_num_threads_per_core);
|
||||
|
||||
nPackages = __kmp_place_num_sockets; // correct nPackages
|
||||
nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
|
||||
__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
|
||||
__kmp_avail_proc = n_new; // correct avail_proc
|
||||
__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
|
||||
|
||||
}
|
||||
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
|
||||
KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
|
||||
__kmp_hws_proc.num);
|
||||
nPackages = __kmp_hws_socket.num; // correct nPackages
|
||||
nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
|
||||
__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
|
||||
__kmp_avail_proc = n_new; // correct avail_proc
|
||||
__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
|
||||
} // non-hwloc topology method
|
||||
if (pAddr) {
|
||||
__kmp_free( *pAddr );
|
||||
*pAddr = newAddr; // replace old topology with new one
|
||||
__kmp_free( *pAddr );
|
||||
*pAddr = newAddr; // replace old topology with new one
|
||||
}
|
||||
if (__kmp_affinity_verbose) {
|
||||
char m[KMP_AFFIN_MASK_PRINT_LEN];
|
||||
__kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
|
||||
if (__kmp_affinity_respect_mask) {
|
||||
KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
|
||||
} else {
|
||||
KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
|
||||
}
|
||||
KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
__kmp_str_buf_print(&buf, "%d", nPackages);
|
||||
KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
_exit:
|
||||
if (__kmp_pu_os_idx != NULL) {
|
||||
__kmp_free(__kmp_pu_os_idx);
|
||||
__kmp_pu_os_idx = NULL;
|
||||
__kmp_free(__kmp_pu_os_idx);
|
||||
__kmp_pu_os_idx = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3038,18 +3038,6 @@ __kmpc_get_parent_taskid() {
|
|||
|
||||
} // __kmpc_get_parent_taskid
|
||||
|
||||
void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
|
||||
{
|
||||
if ( ! __kmp_init_serial ) {
|
||||
__kmp_serial_initialize();
|
||||
}
|
||||
__kmp_place_num_sockets = nS;
|
||||
__kmp_place_socket_offset = sO;
|
||||
__kmp_place_num_cores = nC;
|
||||
__kmp_place_core_offset = cO;
|
||||
__kmp_place_num_threads_per_core = nT;
|
||||
}
|
||||
|
||||
#if OMP_45_ENABLED
|
||||
/*!
|
||||
@ingroup WORK_SHARING
|
||||
|
|
|
@ -264,11 +264,13 @@ kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 };
|
|||
int __kmp_affinity_num_places = 0;
|
||||
#endif
|
||||
|
||||
int __kmp_place_num_sockets = 0;
|
||||
int __kmp_place_socket_offset = 0;
|
||||
int __kmp_place_num_cores = 0;
|
||||
int __kmp_place_core_offset = 0;
|
||||
int __kmp_place_num_threads_per_core = 0;
|
||||
kmp_hws_item_t __kmp_hws_socket = {0, 0};
|
||||
kmp_hws_item_t __kmp_hws_node = {0, 0};
|
||||
kmp_hws_item_t __kmp_hws_tile = {0, 0};
|
||||
kmp_hws_item_t __kmp_hws_core = {0, 0};
|
||||
kmp_hws_item_t __kmp_hws_proc = {0, 0};
|
||||
int __kmp_hws_requested = 0;
|
||||
int __kmp_hws_abs_flag = 0; // absolute or per-item number requested
|
||||
|
||||
#if OMP_40_ENABLED
|
||||
kmp_int32 __kmp_default_device = 0;
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "kmp_lock.h"
|
||||
#include "kmp_io.h"
|
||||
#include "kmp_affinity.h"
|
||||
#include <ctype.h> // toupper()
|
||||
|
||||
static int __kmp_env_toPrint( char const * name, int flag );
|
||||
|
||||
|
@ -3108,6 +3109,12 @@ __kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name,
|
|||
break;
|
||||
# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
|
||||
|
||||
# if KMP_USE_HWLOC
|
||||
case affinity_top_method_hwloc:
|
||||
value = "hwloc";
|
||||
break;
|
||||
# endif
|
||||
|
||||
case affinity_top_method_cpuinfo:
|
||||
value = "cpuinfo";
|
||||
break;
|
||||
|
@ -4297,275 +4304,152 @@ __kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name
|
|||
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
// The longest observable sequense of items is
|
||||
// Socket-Node-Tile-Core-Thread
|
||||
// So, let's limit to 5 levels for now
|
||||
// The input string is usually short enough, let's use 512 limit for now
|
||||
#define MAX_T_LEVEL 5
|
||||
#define MAX_STR_LEN 512
|
||||
static void
|
||||
__kmp_stg_parse_hw_subset( char const * name, char const * value, void * data ) {
|
||||
// Value example: 5Cx2Tx15O
|
||||
// Which means "use 5 cores with offset 15, 2 threads per core"
|
||||
// AC: extended to sockets level, examples of
|
||||
// "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core":
|
||||
// 2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t
|
||||
// To not break legacy code core-offset can be last;
|
||||
// postfix "o" or prefix @ can be offset designator.
|
||||
// Note: not all syntax errors are analyzed, some may be skipped.
|
||||
#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == 'x')
|
||||
static int parsed = 0;
|
||||
int num;
|
||||
int single_warning = 0;
|
||||
int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0;
|
||||
const char *next = value;
|
||||
const char *prev;
|
||||
|
||||
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
|
||||
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
|
||||
if( parsed == 1 ) {
|
||||
return; // already parsed KMP_HW_SUBSET
|
||||
}
|
||||
// Value example: 1s,5c@3,2T
|
||||
// Which means "use 1 socket, 5 cores with offset 3, 2 threads per core"
|
||||
static int parsed = 0;
|
||||
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
|
||||
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
|
||||
if( parsed == 1 ) {
|
||||
return; // already parsed KMP_HW_SUBSET
|
||||
}
|
||||
parsed = 1;
|
||||
}
|
||||
parsed = 1;
|
||||
|
||||
SKIP_WS(next); // skip white spaces
|
||||
if (*next == '\0')
|
||||
return; // no data provided, retain default values
|
||||
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
|
||||
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
|
||||
if( parsed == 1 ) {
|
||||
return; // already parsed KMP_HW_SUBSET
|
||||
}
|
||||
char *components[MAX_T_LEVEL];
|
||||
char const *digits = "0123456789";
|
||||
char input[MAX_STR_LEN];
|
||||
size_t len = 0, mlen = MAX_STR_LEN;
|
||||
int level = 0;
|
||||
// Canonize the string (remove spaces, unify delimiters, etc.)
|
||||
char *pos = (char *)value;
|
||||
while (*pos && mlen) {
|
||||
if (*pos != ' ') { // skip spaces
|
||||
if (len == 0 && *pos == ':') {
|
||||
__kmp_hws_abs_flag = 1; // if the first symbol is ":", skip it
|
||||
} else {
|
||||
input[len] = toupper(*pos);
|
||||
if (input[len] == 'X')
|
||||
input[len] = ','; // unify delimiters of levels
|
||||
if (input[len] == 'O' && strchr(digits, *(pos + 1)))
|
||||
input[len] = '@'; // unify delimiters of offset
|
||||
len++;
|
||||
}
|
||||
}
|
||||
parsed = 1;
|
||||
|
||||
SKIP_WS(next); // skip white spaces
|
||||
if (*next == '\0')
|
||||
return; // no data provided, retain default values
|
||||
// Get num_sockets first (or whatever specified)
|
||||
if (*next >= '0' && *next <= '9') {
|
||||
prev = next;
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
SKIP_WS(next);
|
||||
if (*next == 's' || *next == 'S') { // e.g. "2s"
|
||||
__kmp_place_num_sockets = num;
|
||||
flagS = 1; // got num sockets
|
||||
next++;
|
||||
if (*next == '@') { // socket offset, e.g. "2s@4"
|
||||
flagSO = 1;
|
||||
prev = ++next; // don't allow spaces for simplicity
|
||||
if (!(*next >= '0' && *next <= '9')) {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
__kmp_place_socket_offset = num;
|
||||
}
|
||||
} else if (*next == 'c' || *next == 'C') {
|
||||
__kmp_place_num_cores = num;
|
||||
flagS = flagC = 1; // sockets were not specified - use default
|
||||
next++;
|
||||
if (*next == '@') { // core offset, e.g. "2c@6"
|
||||
flagCO = 1;
|
||||
prev = ++next; // don't allow spaces for simplicity
|
||||
if (!(*next >= '0' && *next <= '9')) {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
__kmp_place_core_offset = num;
|
||||
}
|
||||
} else if (CHECK_DELIM(next)) {
|
||||
__kmp_place_num_cores = num; // no letter-designator - num cores
|
||||
flagS = flagC = 1; // sockets were not specified - use default
|
||||
next++;
|
||||
} else if (*next == 't' || *next == 'T') {
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
// sockets, cores were not specified - use default
|
||||
return; // we ignore offset value in case all cores are used
|
||||
} else if (*next == '\0') {
|
||||
__kmp_place_num_cores = num;
|
||||
return; // the only value provided - set num cores
|
||||
mlen--;
|
||||
pos++;
|
||||
}
|
||||
if (len == 0 || mlen == 0)
|
||||
goto err; // contents is either empty or too long
|
||||
input[len] = '\0';
|
||||
__kmp_hws_requested = 1; // mark that subset requested
|
||||
// Split by delimiter
|
||||
pos = input;
|
||||
components[level++] = pos;
|
||||
while (pos = strchr(pos, ',')) {
|
||||
*pos = '\0'; // modify input and avoid more copying
|
||||
components[level++] = ++pos; // expect something after ","
|
||||
if (level > MAX_T_LEVEL)
|
||||
goto err; // too many components provided
|
||||
}
|
||||
// Check each component
|
||||
for (int i = 0; i < level; ++i) {
|
||||
int offset = 0;
|
||||
int num = atoi(components[i]); // each component should start with a number
|
||||
if ((pos = strchr(components[i], '@'))) {
|
||||
offset = atoi(pos + 1); // save offset
|
||||
*pos = '\0'; // cut the offset from the component
|
||||
}
|
||||
pos = components[i] + strspn(components[i], digits);
|
||||
if (pos == components[i])
|
||||
goto err;
|
||||
// detect the component type
|
||||
switch (*pos) {
|
||||
case 'S': // Socket
|
||||
if (__kmp_hws_socket.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_socket.num = num;
|
||||
__kmp_hws_socket.offset = offset;
|
||||
break;
|
||||
case 'N': // NUMA Node
|
||||
if (__kmp_hws_node.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_node.num = num;
|
||||
__kmp_hws_node.offset = offset;
|
||||
break;
|
||||
case 'L': // Cache
|
||||
if (*(pos + 1) == '2') { // L2 - Tile
|
||||
if (__kmp_hws_tile.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_tile.num = num;
|
||||
__kmp_hws_tile.offset = offset;
|
||||
} else if (*(pos + 1) == '3') { // L3 - Socket
|
||||
if (__kmp_hws_socket.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_socket.num = num;
|
||||
__kmp_hws_socket.offset = offset;
|
||||
} else if (*(pos + 1) == '1') { // L1 - Core
|
||||
if (__kmp_hws_core.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_core.num = num;
|
||||
__kmp_hws_core.offset = offset;
|
||||
}
|
||||
break;
|
||||
case 'C': // Core (or Cache?)
|
||||
if (*(pos + 1) != 'A') {
|
||||
if (__kmp_hws_core.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_core.num = num;
|
||||
__kmp_hws_core.offset = offset;
|
||||
} else { // Cache
|
||||
char *d = pos + strcspn(pos, digits); // find digit
|
||||
if (*d == '2') { // L2 - Tile
|
||||
if (__kmp_hws_tile.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_tile.num = num;
|
||||
__kmp_hws_tile.offset = offset;
|
||||
} else if (*d == '3') { // L3 - Socket
|
||||
if (__kmp_hws_socket.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_socket.num = num;
|
||||
__kmp_hws_socket.offset = offset;
|
||||
} else if (*d == '1') { // L1 - Core
|
||||
if (__kmp_hws_core.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_core.num = num;
|
||||
__kmp_hws_core.offset = offset;
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
goto err;
|
||||
}
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 'T': // Thread
|
||||
if (__kmp_hws_proc.num > 0)
|
||||
goto err; // duplicate is not allowed
|
||||
__kmp_hws_proc.num = num;
|
||||
__kmp_hws_proc.offset = offset;
|
||||
break;
|
||||
default:
|
||||
goto err;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
|
||||
SKIP_WS(next);
|
||||
if (*next == '\0')
|
||||
return; // " n " - something like this
|
||||
if (CHECK_DELIM(next)) {
|
||||
next++; // skip delimiter
|
||||
SKIP_WS(next);
|
||||
}
|
||||
|
||||
// Get second value (could be offset, num_cores, num_threads)
|
||||
if (*next >= '0' && *next <= '9') {
|
||||
prev = next;
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
SKIP_WS(next);
|
||||
if (*next == 'c' || *next == 'C') {
|
||||
KMP_DEBUG_ASSERT(flagC == 0);
|
||||
__kmp_place_num_cores = num;
|
||||
flagC = 1;
|
||||
next++;
|
||||
if (*next == '@') { // core offset, e.g. "2c@6"
|
||||
flagCO = 1;
|
||||
prev = ++next; // don't allow spaces for simplicity
|
||||
if (!(*next >= '0' && *next <= '9')) {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
__kmp_place_core_offset = num;
|
||||
}
|
||||
} else if (*next == 'o' || *next == 'O') { // offset specified
|
||||
KMP_WARNING(AffHWSubsetDeprecated);
|
||||
single_warning = 1;
|
||||
if (flagC) { // whether num_cores already specified (sockets skipped)
|
||||
KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both
|
||||
__kmp_place_core_offset = num;
|
||||
} else {
|
||||
KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both
|
||||
__kmp_place_socket_offset = num;
|
||||
}
|
||||
next++;
|
||||
} else if (*next == 't' || *next == 'T') {
|
||||
KMP_DEBUG_ASSERT(flagT == 0);
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
flagC = 1; // num_cores could be skipped ?
|
||||
flagT = 1;
|
||||
next++; // can have core-offset specified after num threads
|
||||
} else if (*next == '\0') {
|
||||
KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
return; // two values provided without letter-designator
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
SKIP_WS(next);
|
||||
if (*next == '\0')
|
||||
return; // " Ns,Nc " - something like this
|
||||
if (CHECK_DELIM(next)) {
|
||||
next++; // skip delimiter
|
||||
SKIP_WS(next);
|
||||
}
|
||||
|
||||
// Get third value (could be core-offset, num_cores, num_threads)
|
||||
if (*next >= '0' && *next <= '9') {
|
||||
prev = next;
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
SKIP_WS(next);
|
||||
if (*next == 't' || *next == 'T') {
|
||||
KMP_DEBUG_ASSERT(flagT == 0);
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
if (flagC == 0)
|
||||
return; // num_cores could be skipped (e.g. 2s,4o,2t)
|
||||
flagT = 1;
|
||||
next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
|
||||
} else if (*next == 'c' || *next == 'C') {
|
||||
KMP_DEBUG_ASSERT(flagC == 0);
|
||||
__kmp_place_num_cores = num;
|
||||
flagC = 1;
|
||||
next++;
|
||||
//KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator
|
||||
} else if (*next == 'o' || *next == 'O') {
|
||||
KMP_WARNING(AffHWSubsetDeprecated);
|
||||
single_warning = 1;
|
||||
KMP_DEBUG_ASSERT(flagC);
|
||||
//KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
|
||||
__kmp_place_core_offset = num;
|
||||
next++;
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
KMP_DEBUG_ASSERT(flagC);
|
||||
SKIP_WS(next);
|
||||
if ( *next == '\0' )
|
||||
return;
|
||||
if (CHECK_DELIM(next)) {
|
||||
next++; // skip delimiter
|
||||
SKIP_WS(next);
|
||||
}
|
||||
|
||||
// Get 4-th value (could be core-offset, num_threads)
|
||||
if (*next >= '0' && *next <= '9') {
|
||||
prev = next;
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
SKIP_WS(next);
|
||||
if (*next == 'o' || *next == 'O') {
|
||||
if (!single_warning) { // warn once
|
||||
KMP_WARNING(AffHWSubsetDeprecated);
|
||||
}
|
||||
KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
|
||||
__kmp_place_core_offset = num;
|
||||
next++;
|
||||
} else if (*next == 't' || *next == 'T') {
|
||||
KMP_DEBUG_ASSERT(flagT == 0);
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
flagT = 1;
|
||||
next++; // can have core-offset specified after num threads
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
return;
|
||||
}
|
||||
SKIP_WS(next);
|
||||
if ( *next == '\0' )
|
||||
return;
|
||||
if (CHECK_DELIM(next)) {
|
||||
next++; // skip delimiter
|
||||
SKIP_WS(next);
|
||||
}
|
||||
|
||||
// Get 5-th value (could be core-offset, num_threads)
|
||||
if (*next >= '0' && *next <= '9') {
|
||||
prev = next;
|
||||
SKIP_DIGITS(next);
|
||||
num = __kmp_str_to_int(prev, *next);
|
||||
SKIP_WS(next);
|
||||
if (*next == 'o' || *next == 'O') {
|
||||
if (!single_warning) { // warn once
|
||||
KMP_WARNING(AffHWSubsetDeprecated);
|
||||
}
|
||||
KMP_DEBUG_ASSERT(flagT);
|
||||
KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
|
||||
__kmp_place_core_offset = num;
|
||||
} else if (*next == 't' || *next == 'T') {
|
||||
KMP_DEBUG_ASSERT(flagT == 0);
|
||||
__kmp_place_num_threads_per_core = num;
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
}
|
||||
} else {
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
}
|
||||
return;
|
||||
#undef CHECK_DELIM
|
||||
}
|
||||
return;
|
||||
err:
|
||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||
__kmp_hws_requested = 0; // mark that subset not requested
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
__kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * data ) {
|
||||
if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
|
||||
if (__kmp_hws_requested) {
|
||||
int comma = 0;
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
|
@ -4573,26 +4457,34 @@ __kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * dat
|
|||
KMP_STR_BUF_PRINT_NAME_EX(name);
|
||||
else
|
||||
__kmp_str_buf_print(buffer, " %s='", name);
|
||||
if (__kmp_place_num_sockets) {
|
||||
__kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
|
||||
if (__kmp_place_socket_offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset);
|
||||
if (__kmp_hws_socket.num) {
|
||||
__kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num);
|
||||
if (__kmp_hws_socket.offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset);
|
||||
comma = 1;
|
||||
}
|
||||
if (__kmp_place_num_cores) {
|
||||
__kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
|
||||
if (__kmp_place_core_offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset);
|
||||
if (__kmp_hws_node.num) {
|
||||
__kmp_str_buf_print(&buf, "%s%dn", comma?",":"", __kmp_hws_node.num);
|
||||
if (__kmp_hws_node.offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset);
|
||||
comma = 1;
|
||||
}
|
||||
if (__kmp_place_num_threads_per_core)
|
||||
__kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
|
||||
if (__kmp_hws_tile.num) {
|
||||
__kmp_str_buf_print(&buf, "%s%dL2", comma?",":"", __kmp_hws_tile.num);
|
||||
if (__kmp_hws_tile.offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset);
|
||||
comma = 1;
|
||||
}
|
||||
if (__kmp_hws_core.num) {
|
||||
__kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_hws_core.num);
|
||||
if (__kmp_hws_core.offset)
|
||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset);
|
||||
comma = 1;
|
||||
}
|
||||
if (__kmp_hws_proc.num)
|
||||
__kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_hws_proc.num);
|
||||
__kmp_str_buf_print(buffer, "%s'\n", buf.str );
|
||||
__kmp_str_buf_free(&buf);
|
||||
/*
|
||||
} else {
|
||||
__kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) );
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue