KMP_HW_SUBSET extended with NUMA support when HWLOC enabled

Differential Revision: https://reviews.llvm.org/D31600

llvm-svn: 300220
This commit is contained in:
Andrey Churbanov 2017-04-13 17:15:07 +00:00
parent 7840dc8451
commit 4a9a89241b
7 changed files with 828 additions and 374 deletions

View File

@ -351,7 +351,7 @@ kmpc_set_defaults 224
%ifdef OMP_30
__kmpc_omp_taskyield 235
%endif # OMP_30
__kmpc_place_threads 236
# __kmpc_place_threads 236
%endif
# OpenMP 4.0 entry points

View File

@ -38,7 +38,7 @@ Language "English"
Country "USA"
LangId "1033"
Version "2"
Revision "20160714"
Revision "20161216"
@ -388,8 +388,8 @@ OBSOLETE "%1$s: granularity=core will be used."
EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored."
FutexNotSupported "futex system call not supported; %1$s=%2$s ignored."
AffGranUsing "%1$s: granularity=%2$s will be used."
AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt "
"(nSockets@offset, nCores@offset, nTthreads per core)\"."
AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"N<item>[@N][,...][,Nt] "
"(<item> can be S, N, L2, C, T for Socket, NUMA Node, L2 Cache, Core, Thread)\"."
AffHWSubsetUnsupported "KMP_HW_SUBSET ignored: unsupported architecture."
AffHWSubsetManyCores "KMP_HW_SUBSET ignored: too many cores requested."
SyntaxErrorUsing "%1$s: syntax error, using %2$s."
@ -411,6 +411,10 @@ AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal af
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"
AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes requested."
AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested."
AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested."
# --------------------------------------------------------------------------------------------------

View File

@ -774,11 +774,19 @@ typedef enum kmp_cancel_kind_t {
} kmp_cancel_kind_t;
#endif // OMP_40_ENABLED
extern int __kmp_place_num_sockets;
extern int __kmp_place_socket_offset;
extern int __kmp_place_num_cores;
extern int __kmp_place_core_offset;
extern int __kmp_place_num_threads_per_core;
// KMP_HW_SUBSET support:
typedef struct kmp_hws_item {
int num;
int offset;
} kmp_hws_item_t;
extern kmp_hws_item_t __kmp_hws_socket;
extern kmp_hws_item_t __kmp_hws_node;
extern kmp_hws_item_t __kmp_hws_tile;
extern kmp_hws_item_t __kmp_hws_core;
extern kmp_hws_item_t __kmp_hws_proc;
extern int __kmp_hws_requested;
extern int __kmp_hws_abs_flag; // absolute or per-item number requested
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@ -3494,9 +3502,6 @@ KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
// this function exported for testing of KMP_PLACE_THREADS functionality
KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */

View File

@ -3405,102 +3405,665 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
#undef ADD_MASK
#undef ADD_MASK_OSID
#if KMP_USE_HWLOC
static int
__kmp_hwloc_count_children_by_type(
hwloc_topology_t t, hwloc_obj_t o, hwloc_obj_type_t type, hwloc_obj_t* f)
{
if (!hwloc_compare_types(o->type, type)) {
if (*f == NULL)
*f = o; // output first descendant found
return 1;
}
int sum = 0;
for (unsigned i = 0; i < o->arity; i++)
sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
return sum; // will be 0 if no one found (as PU arity is 0)
}
static int
__kmp_hwloc_count_children_by_depth(
hwloc_topology_t t, hwloc_obj_t o, unsigned depth, hwloc_obj_t* f)
{
if (o->depth == depth) {
if (*f == NULL)
*f = o; // output first descendant found
return 1;
}
int sum = 0;
for (unsigned i = 0; i < o->arity; i++)
sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
return sum; // will be 0 if no one found (as PU arity is 0)
}
static int
__kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o)
{ // skip PUs descendants of the object o
int skipped = 0;
hwloc_obj_t hT = NULL;
int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
for (int i = 0; i < N; ++i) {
KMP_DEBUG_ASSERT(hT);
unsigned idx = hT->os_index;
if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
++skipped;
}
hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
}
return skipped; // count number of skipped units
}
static int
__kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o)
{ // check if obj has PUs present in fullMask
hwloc_obj_t hT = NULL;
int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
for (int i = 0; i < N; ++i) {
KMP_DEBUG_ASSERT(hT);
unsigned idx = hT->os_index;
if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
return 1; // found PU
hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
}
return 0; // no PUs found
}
#endif // KMP_USE_HWLOC
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
if (__kmp_place_num_sockets == 0 &&
__kmp_place_num_cores == 0 &&
__kmp_place_num_threads_per_core == 0 )
goto _exit; // no topology limiting actions requested, exit
if (__kmp_place_num_sockets == 0)
__kmp_place_num_sockets = nPackages; // use all available sockets
if (__kmp_place_num_cores == 0)
__kmp_place_num_cores = nCoresPerPkg; // use all available cores
if (__kmp_place_num_threads_per_core == 0 ||
__kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
AddrUnsPair *newAddr;
if (__kmp_hws_requested == 0)
goto _exit; // no topology limiting actions requested, exit
#if KMP_USE_HWLOC
if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
// Number of subobjects calculated dynamically, this works fine for
// any non-uniform topology.
// L2 cache objects are determined by depth, other objects - by type.
hwloc_topology_t tp = __kmp_hwloc_topology;
int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
int nCr=0, nTr=0; // number of requested units
int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
int L2depth, idx;
if ( !__kmp_affinity_uniform_topology() ) {
// check support of extensions ----------------------------------
int numa_support = 0, tile_support = 0;
if (__kmp_pu_os_idx)
hT = hwloc_get_pu_obj_by_os_index(
tp, __kmp_pu_os_idx[__kmp_avail_proc - 1]);
else
hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
if (hT == NULL) { // something's gone wrong
KMP_WARNING(AffHWSubsetUnsupported);
goto _exit;
}
// check NUMA node
hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
if (hN != NULL && hN->depth > hS->depth) {
numa_support = 1; // 1 in case socket includes node(s)
} else if (__kmp_hws_node.num > 0) {
// don't support sockets inside NUMA node (no such HW found for testing)
KMP_WARNING(AffHWSubsetUnsupported);
goto _exit;
}
// check L2 cahce, get object by depth because of multiple caches
L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
if (hL != NULL && __kmp_hwloc_count_children_by_type(
tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
tile_support = 1; // no sense to count L2 if it includes single core
} else if (__kmp_hws_tile.num > 0) {
if (__kmp_hws_core.num == 0) {
__kmp_hws_core = __kmp_hws_tile; // replace L2 with core
__kmp_hws_tile.num = 0;
} else {
// L2 and core are both requested, but represent same object
KMP_WARNING(AffHWSubsetInvalid);
goto _exit;
}
}
// end of check of extensions -----------------------------------
// fill in unset items, validate settings -----------------------
if (__kmp_hws_socket.num == 0)
__kmp_hws_socket.num = nPackages; // use all available sockets
if (__kmp_hws_socket.offset >= nPackages) {
KMP_WARNING(AffHWSubsetManySockets);
goto _exit;
}
if (numa_support) {
int NN = __kmp_hwloc_count_children_by_type(
tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in socket
if (__kmp_hws_node.num == 0)
__kmp_hws_node.num = NN; // use all available nodes
if (__kmp_hws_node.offset >= NN) {
KMP_WARNING(AffHWSubsetManyNodes);
goto _exit;
}
if (tile_support) {
// get num tiles in node
int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
if (__kmp_hws_tile.num == 0) {
__kmp_hws_tile.num = NL + 1;
} // use all available tiles, some node may have more tiles, thus +1
if (__kmp_hws_tile.offset >= NL) {
KMP_WARNING(AffHWSubsetManyTiles);
goto _exit;
}
int NC = __kmp_hwloc_count_children_by_type(
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
if (__kmp_hws_core.num == 0)
__kmp_hws_core.num = NC; // use all available cores
if (__kmp_hws_core.offset >= NC) {
KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
}
} else { // tile_support
int NC = __kmp_hwloc_count_children_by_type(
tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in node
if (__kmp_hws_core.num == 0)
__kmp_hws_core.num = NC; // use all available cores
if (__kmp_hws_core.offset >= NC) {
KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
}
} // tile_support
} else { // numa_support
if (tile_support) {
// get num tiles in socket
int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
if (__kmp_hws_tile.num == 0)
__kmp_hws_tile.num = NL; // use all available tiles
if (__kmp_hws_tile.offset >= NL) {
KMP_WARNING(AffHWSubsetManyTiles);
goto _exit;
}
int NC = __kmp_hwloc_count_children_by_type(
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
if (__kmp_hws_core.num == 0)
__kmp_hws_core.num = NC; // use all available cores
if (__kmp_hws_core.offset >= NC) {
KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
}
} else { // tile_support
int NC = __kmp_hwloc_count_children_by_type(
tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
if (__kmp_hws_core.num == 0)
__kmp_hws_core.num = NC; // use all available cores
if (__kmp_hws_core.offset >= NC) {
KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
}
} // tile_support
}
if (__kmp_hws_proc.num == 0)
__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
KMP_WARNING(AffHWSubsetManyProcs);
goto _exit;
}
// end of validation --------------------------------------------
if (pAddr) // pAddr is NULL in case of affinity_none
newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc); // max size
// main loop to form HW subset ----------------------------------
hS = NULL;
int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
for (int s = 0; s < NP; ++s) {
// Check Socket -----------------------------------------------
hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
if (!__kmp_hwloc_obj_has_PUs(tp, hS))
continue; // skip socket if all PUs are out of fullMask
++nS; // only count objects those have PUs in affinity mask
if (nS <= __kmp_hws_socket.offset ||
nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
continue; // move to next socket
}
nCr = 0; // count number of cores per socket
// socket requested, go down the topology tree
// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
if (numa_support) {
nN = 0;
hN = NULL;
int NN = __kmp_hwloc_count_children_by_type(
tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in current socket
for (int n = 0; n < NN; ++n) {
// Check NUMA Node ----------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
continue; // skip node if all PUs are out of fullMask
}
++nN;
if (nN <= __kmp_hws_node.offset ||
nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
// skip node as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
continue; // move to next node
}
// node requested, go down the topology tree
if (tile_support) {
nL = 0;
hL = NULL;
int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
for (int l = 0; l < NL; ++l) {
// Check L2 (tile) ------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
continue; // skip tile if all PUs are out of fullMask
}
++nL;
if (nL <= __kmp_hws_tile.offset ||
nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
// skip tile as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
continue; // move to next tile
}
// tile requested, go down the topology tree
nC = 0;
hC = NULL;
int NC = __kmp_hwloc_count_children_by_type(
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in current tile
for (int c = 0; c < NC; ++c) {
// Check Core ---------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // skip core if all PUs are out of fullMask
}
++nC;
if (nC <= __kmp_hws_core.offset ||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
// skip node as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // move to next node
}
// core requested, go down to PUs
nT = 0;
nTr = 0;
hT = NULL;
int NT = __kmp_hwloc_count_children_by_type(
tp, hC, HWLOC_OBJ_PU, &hT); // num procs in current core
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // skip PU if not in fullMask
}
++nT;
if (nT <= __kmp_hws_proc.offset ||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
// skip PU
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
++n_old;
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // move to next node
}
++nTr;
if (pAddr) // collect requested thread's data
newAddr[n_new] = (*pAddr)[n_old];
++n_new;
++n_old;
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
} // threads loop
if (nTr > 0) {
++nCr; // num cores per socket
++nCo; // total num cores
if (nTr > nTpC)
nTpC = nTr; // calc max threads per core
}
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
} // cores loop
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
} // tiles loop
} else { // tile_support
// no tiles, check cores
nC = 0;
hC = NULL;
int NC = __kmp_hwloc_count_children_by_type(
tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in current node
for (int c = 0; c < NC; ++c) {
// Check Core ---------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // skip core if all PUs are out of fullMask
}
++nC;
if (nC <= __kmp_hws_core.offset ||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
// skip node as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // move to next node
}
// core requested, go down to PUs
nT = 0;
nTr = 0;
hT = NULL;
int NT = __kmp_hwloc_count_children_by_type(
tp, hC, HWLOC_OBJ_PU, &hT);
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // skip PU if not in fullMask
}
++nT;
if (nT <= __kmp_hws_proc.offset ||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
// skip PU
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
++n_old;
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // move to next node
}
++nTr;
if (pAddr) // collect requested thread's data
newAddr[n_new] = (*pAddr)[n_old];
++n_new;
++n_old;
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
} // threads loop
if (nTr > 0) {
++nCr; // num cores per socket
++nCo; // total num cores
if (nTr > nTpC)
nTpC = nTr; // calc max threads per core
}
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
} // cores loop
} // tiles support
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
} // nodes loop
} else { // numa_support
// no NUMA support
if (tile_support) {
nL = 0;
hL = NULL;
int NL = __kmp_hwloc_count_children_by_depth(
tp, hS, L2depth, &hL); // num tiles in current socket
for (int l = 0; l < NL; ++l) {
// Check L2 (tile) ------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
continue; // skip tile if all PUs are out of fullMask
}
++nL;
if (nL <= __kmp_hws_tile.offset ||
nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
// skip tile as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
continue; // move to next tile
}
// tile requested, go down the topology tree
nC = 0;
hC = NULL;
int NC = __kmp_hwloc_count_children_by_type(
tp, hL, HWLOC_OBJ_CORE, &hC); // num cores per tile
for (int c = 0; c < NC; ++c) {
// Check Core ---------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // skip core if all PUs are out of fullMask
}
++nC;
if (nC <= __kmp_hws_core.offset ||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
// skip node as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // move to next node
}
// core requested, go down to PUs
nT = 0;
nTr = 0;
hT = NULL;
int NT = __kmp_hwloc_count_children_by_type(
tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // skip PU if not in fullMask
}
++nT;
if (nT <= __kmp_hws_proc.offset ||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
// skip PU
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
++n_old;
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // move to next node
}
++nTr;
if (pAddr) // collect requested thread's data
newAddr[n_new] = (*pAddr)[n_old];
++n_new;
++n_old;
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
} // threads loop
if (nTr > 0) {
++nCr; // num cores per socket
++nCo; // total num cores
if (nTr > nTpC)
nTpC = nTr; // calc max threads per core
}
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
} // cores loop
hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
} // tiles loop
} else { // tile_support
// no tiles, check cores
nC = 0;
hC = NULL;
int NC = __kmp_hwloc_count_children_by_type(
tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
for (int c = 0; c < NC; ++c) {
// Check Core -------------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // skip core if all PUs are out of fullMask
}
++nC;
if (nC <= __kmp_hws_core.offset ||
nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
// skip node as not requested
n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // move to next node
}
// core requested, go down to PUs
nT = 0;
nTr = 0;
hT = NULL;
int NT = __kmp_hwloc_count_children_by_type(
tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // skip PU if not in fullMask
}
++nT;
if (nT <= __kmp_hws_proc.offset ||
nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
// skip PU
KMP_CPU_CLR(idx, __kmp_affin_fullMask);
++n_old;
KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
continue; // move to next node
}
++nTr;
if (pAddr) // collect requested thread's data
newAddr[n_new] = (*pAddr)[n_old];
++n_new;
++n_old;
hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
} // threads loop
if (nTr > 0) {
++nCr; // num cores per socket
++nCo; // total num cores
if (nTr > nTpC)
nTpC = nTr; // calc max threads per core
}
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
} // cores loop
} // tiles support
} // numa_support
if (nCr > 0) { // found cores?
++nPkg; // num sockets
if (nCr > nCpP)
nCpP = nCr; // calc max cores per socket
}
} // sockets loop
// check the subset is valid
KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
KMP_DEBUG_ASSERT(nPkg > 0);
KMP_DEBUG_ASSERT(nCpP > 0);
KMP_DEBUG_ASSERT(nTpC > 0);
KMP_DEBUG_ASSERT(nCo > 0);
KMP_DEBUG_ASSERT(nPkg <= nPackages);
KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
nPackages = nPkg; // correct num sockets
nCoresPerPkg = nCpP; // correct num cores per socket
__kmp_nThreadsPerCore = nTpC; // correct num threads per core
__kmp_avail_proc = n_new; // correct num procs
__kmp_ncores = nCo; // correct num cores
// hwloc topology method end
} else
#endif // KMP_USE_HWLOC
{
int n_old = 0, n_new = 0, proc_num = 0;
if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
KMP_WARNING(AffHWSubsetNoHWLOC);
goto _exit;
}
if (__kmp_hws_socket.num == 0)
__kmp_hws_socket.num = nPackages; // use all available sockets
if (__kmp_hws_core.num == 0)
__kmp_hws_core.num = nCoresPerPkg; // use all available cores
if (__kmp_hws_proc.num == 0 ||
__kmp_hws_proc.num > __kmp_nThreadsPerCore)
__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffHWSubsetNonUniform );
goto _exit; // don't support non-uniform topology
}
if ( depth > 3 ) {
}
if ( depth > 3 ) {
KMP_WARNING( AffHWSubsetNonThreeLevel );
goto _exit; // don't support not-3-level topology
}
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
}
if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
KMP_WARNING(AffHWSubsetManySockets);
goto _exit;
}
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
}
if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
KMP_WARNING( AffHWSubsetManyCores );
goto _exit;
}
AddrUnsPair *newAddr;
if (pAddr) // pAddr is NULL in case of affinity_none
}
// Form the requested subset
if (pAddr) // pAddr is NULL in case of affinity_none
newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
for (i = 0; i < nPackages; ++i) {
if (i < __kmp_place_socket_offset ||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
if (__kmp_pu_os_idx != NULL) {
for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
__kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num);
for (int i = 0; i < nPackages; ++i) {
if (i < __kmp_hws_socket.offset ||
i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
// skip not-requested socket
n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
if (__kmp_pu_os_idx != NULL) {
// walk through skipped socket
for (int j = 0; j < nCoresPerPkg; ++j) {
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
}
} else {
for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
if (j < __kmp_place_core_offset ||
j >= __kmp_place_core_offset + __kmp_place_num_cores) {
n_old += __kmp_nThreadsPerCore; // skip not-requested core
if (__kmp_pu_os_idx != NULL) {
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
} else {
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
if (k < __kmp_place_num_threads_per_core) {
if (pAddr)
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
n_new++;
} else {
if (__kmp_pu_os_idx != NULL)
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
n_old++;
++proc_num;
}
// walk through requested socket
for (int j = 0; j < nCoresPerPkg; ++j) {
if (j < __kmp_hws_core.offset ||
j >= __kmp_hws_core.offset + __kmp_hws_core.num)
{ // skip not-requested core
n_old += __kmp_nThreadsPerCore;
if (__kmp_pu_os_idx != NULL) {
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
++proc_num;
}
}
} else {
// walk through requested core
for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
if (k < __kmp_hws_proc.num) {
if (pAddr) // collect requested thread's data
newAddr[n_new] = (*pAddr)[n_old];
n_new++;
} else {
if (__kmp_pu_os_idx != NULL)
KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
n_old++;
++proc_num;
}
}
}
}
}
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
__kmp_place_num_threads_per_core);
nPackages = __kmp_place_num_sockets; // correct nPackages
nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
__kmp_avail_proc = n_new; // correct avail_proc
__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
}
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
__kmp_hws_proc.num);
nPackages = __kmp_hws_socket.num; // correct nPackages
nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
__kmp_avail_proc = n_new; // correct avail_proc
__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
} // non-hwloc topology method
if (pAddr) {
__kmp_free( *pAddr );
*pAddr = newAddr; // replace old topology with new one
__kmp_free( *pAddr );
*pAddr = newAddr; // replace old topology with new one
}
if (__kmp_affinity_verbose) {
char m[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
if (__kmp_affinity_respect_mask) {
KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
} else {
KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
}
KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
__kmp_str_buf_print(&buf, "%d", nPackages);
KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
__kmp_nThreadsPerCore, __kmp_ncores);
__kmp_str_buf_free(&buf);
}
_exit:
if (__kmp_pu_os_idx != NULL) {
__kmp_free(__kmp_pu_os_idx);
__kmp_pu_os_idx = NULL;
__kmp_free(__kmp_pu_os_idx);
__kmp_pu_os_idx = NULL;
}
}

View File

@ -3038,18 +3038,6 @@ __kmpc_get_parent_taskid() {
} // __kmpc_get_parent_taskid
void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
{
if ( ! __kmp_init_serial ) {
__kmp_serial_initialize();
}
__kmp_place_num_sockets = nS;
__kmp_place_socket_offset = sO;
__kmp_place_num_cores = nC;
__kmp_place_core_offset = cO;
__kmp_place_num_threads_per_core = nT;
}
#if OMP_45_ENABLED
/*!
@ingroup WORK_SHARING

View File

@ -264,11 +264,13 @@ kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 };
int __kmp_affinity_num_places = 0;
#endif
int __kmp_place_num_sockets = 0;
int __kmp_place_socket_offset = 0;
int __kmp_place_num_cores = 0;
int __kmp_place_core_offset = 0;
int __kmp_place_num_threads_per_core = 0;
kmp_hws_item_t __kmp_hws_socket = {0, 0};
kmp_hws_item_t __kmp_hws_node = {0, 0};
kmp_hws_item_t __kmp_hws_tile = {0, 0};
kmp_hws_item_t __kmp_hws_core = {0, 0};
kmp_hws_item_t __kmp_hws_proc = {0, 0};
int __kmp_hws_requested = 0;
int __kmp_hws_abs_flag = 0; // absolute or per-item number requested
#if OMP_40_ENABLED
kmp_int32 __kmp_default_device = 0;

View File

@ -24,6 +24,7 @@
#include "kmp_lock.h"
#include "kmp_io.h"
#include "kmp_affinity.h"
#include <ctype.h> // toupper()
static int __kmp_env_toPrint( char const * name, int flag );
@ -3108,6 +3109,12 @@ __kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name,
break;
# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
# if KMP_USE_HWLOC
case affinity_top_method_hwloc:
value = "hwloc";
break;
# endif
case affinity_top_method_cpuinfo:
value = "cpuinfo";
break;
@ -4297,275 +4304,152 @@ __kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
// -------------------------------------------------------------------------------------------------
// The longest observable sequense of items is
// Socket-Node-Tile-Core-Thread
// So, let's limit to 5 levels for now
// The input string is usually short enough, let's use 512 limit for now
#define MAX_T_LEVEL 5
#define MAX_STR_LEN 512
static void
__kmp_stg_parse_hw_subset( char const * name, char const * value, void * data ) {
// Value example: 5Cx2Tx15O
// Which means "use 5 cores with offset 15, 2 threads per core"
// AC: extended to sockets level, examples of
// "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core":
// 2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s@6,2c@2,2t
// To not break legacy code core-offset can be last;
// postfix "o" or prefix @ can be offset designator.
// Note: not all syntax errors are analyzed, some may be skipped.
#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == 'x')
static int parsed = 0;
int num;
int single_warning = 0;
int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0;
const char *next = value;
const char *prev;
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
if( parsed == 1 ) {
return; // already parsed KMP_HW_SUBSET
}
// Value example: 1s,5c@3,2T
// Which means "use 1 socket, 5 cores with offset 3, 2 threads per core"
static int parsed = 0;
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
if( parsed == 1 ) {
return; // already parsed KMP_HW_SUBSET
}
parsed = 1;
}
parsed = 1;
SKIP_WS(next); // skip white spaces
if (*next == '\0')
return; // no data provided, retain default values
if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
if( parsed == 1 ) {
return; // already parsed KMP_HW_SUBSET
}
char *components[MAX_T_LEVEL];
char const *digits = "0123456789";
char input[MAX_STR_LEN];
size_t len = 0, mlen = MAX_STR_LEN;
int level = 0;
// Canonize the string (remove spaces, unify delimiters, etc.)
char *pos = (char *)value;
while (*pos && mlen) {
if (*pos != ' ') { // skip spaces
if (len == 0 && *pos == ':') {
__kmp_hws_abs_flag = 1; // if the first symbol is ":", skip it
} else {
input[len] = toupper(*pos);
if (input[len] == 'X')
input[len] = ','; // unify delimiters of levels
if (input[len] == 'O' && strchr(digits, *(pos + 1)))
input[len] = '@'; // unify delimiters of offset
len++;
}
}
parsed = 1;
SKIP_WS(next); // skip white spaces
if (*next == '\0')
return; // no data provided, retain default values
// Get num_sockets first (or whatever specified)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 's' || *next == 'S') { // e.g. "2s"
__kmp_place_num_sockets = num;
flagS = 1; // got num sockets
next++;
if (*next == '@') { // socket offset, e.g. "2s@4"
flagSO = 1;
prev = ++next; // don't allow spaces for simplicity
if (!(*next >= '0' && *next <= '9')) {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
__kmp_place_socket_offset = num;
}
} else if (*next == 'c' || *next == 'C') {
__kmp_place_num_cores = num;
flagS = flagC = 1; // sockets were not specified - use default
next++;
if (*next == '@') { // core offset, e.g. "2c@6"
flagCO = 1;
prev = ++next; // don't allow spaces for simplicity
if (!(*next >= '0' && *next <= '9')) {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
__kmp_place_core_offset = num;
}
} else if (CHECK_DELIM(next)) {
__kmp_place_num_cores = num; // no letter-designator - num cores
flagS = flagC = 1; // sockets were not specified - use default
next++;
} else if (*next == 't' || *next == 'T') {
__kmp_place_num_threads_per_core = num;
// sockets, cores were not specified - use default
return; // we ignore offset value in case all cores are used
} else if (*next == '\0') {
__kmp_place_num_cores = num;
return; // the only value provided - set num cores
mlen--;
pos++;
}
if (len == 0 || mlen == 0)
goto err; // contents is either empty or too long
input[len] = '\0';
__kmp_hws_requested = 1; // mark that subset requested
// Split by delimiter
pos = input;
components[level++] = pos;
while (pos = strchr(pos, ',')) {
*pos = '\0'; // modify input and avoid more copying
components[level++] = ++pos; // expect something after ","
if (level > MAX_T_LEVEL)
goto err; // too many components provided
}
// Check each component
for (int i = 0; i < level; ++i) {
int offset = 0;
int num = atoi(components[i]); // each component should start with a number
if ((pos = strchr(components[i], '@'))) {
offset = atoi(pos + 1); // save offset
*pos = '\0'; // cut the offset from the component
}
pos = components[i] + strspn(components[i], digits);
if (pos == components[i])
goto err;
// detect the component type
switch (*pos) {
case 'S': // Socket
if (__kmp_hws_socket.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_socket.num = num;
__kmp_hws_socket.offset = offset;
break;
case 'N': // NUMA Node
if (__kmp_hws_node.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_node.num = num;
__kmp_hws_node.offset = offset;
break;
case 'L': // Cache
if (*(pos + 1) == '2') { // L2 - Tile
if (__kmp_hws_tile.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_tile.num = num;
__kmp_hws_tile.offset = offset;
} else if (*(pos + 1) == '3') { // L3 - Socket
if (__kmp_hws_socket.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_socket.num = num;
__kmp_hws_socket.offset = offset;
} else if (*(pos + 1) == '1') { // L1 - Core
if (__kmp_hws_core.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_core.num = num;
__kmp_hws_core.offset = offset;
}
break;
case 'C': // Core (or Cache?)
if (*(pos + 1) != 'A') {
if (__kmp_hws_core.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_core.num = num;
__kmp_hws_core.offset = offset;
} else { // Cache
char *d = pos + strcspn(pos, digits); // find digit
if (*d == '2') { // L2 - Tile
if (__kmp_hws_tile.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_tile.num = num;
__kmp_hws_tile.offset = offset;
} else if (*d == '3') { // L3 - Socket
if (__kmp_hws_socket.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_socket.num = num;
__kmp_hws_socket.offset = offset;
} else if (*d == '1') { // L1 - Core
if (__kmp_hws_core.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_core.num = num;
__kmp_hws_core.offset = offset;
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
goto err;
}
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
break;
case 'T': // Thread
if (__kmp_hws_proc.num > 0)
goto err; // duplicate is not allowed
__kmp_hws_proc.num = num;
__kmp_hws_proc.offset = offset;
break;
default:
goto err;
}
KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
SKIP_WS(next);
if (*next == '\0')
return; // " n " - something like this
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get second value (could be offset, num_cores, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'c' || *next == 'C') {
KMP_DEBUG_ASSERT(flagC == 0);
__kmp_place_num_cores = num;
flagC = 1;
next++;
if (*next == '@') { // core offset, e.g. "2c@6"
flagCO = 1;
prev = ++next; // don't allow spaces for simplicity
if (!(*next >= '0' && *next <= '9')) {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
__kmp_place_core_offset = num;
}
} else if (*next == 'o' || *next == 'O') { // offset specified
KMP_WARNING(AffHWSubsetDeprecated);
single_warning = 1;
if (flagC) { // whether num_cores already specified (sockets skipped)
KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both
__kmp_place_core_offset = num;
} else {
KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both
__kmp_place_socket_offset = num;
}
next++;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
flagC = 1; // num_cores could be skipped ?
flagT = 1;
next++; // can have core-offset specified after num threads
} else if (*next == '\0') {
KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
__kmp_place_num_threads_per_core = num;
return; // two values provided without letter-designator
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
SKIP_WS(next);
if (*next == '\0')
return; // " Ns,Nc " - something like this
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get third value (could be core-offset, num_cores, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
if (flagC == 0)
return; // num_cores could be skipped (e.g. 2s,4o,2t)
flagT = 1;
next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
} else if (*next == 'c' || *next == 'C') {
KMP_DEBUG_ASSERT(flagC == 0);
__kmp_place_num_cores = num;
flagC = 1;
next++;
//KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator
} else if (*next == 'o' || *next == 'O') {
KMP_WARNING(AffHWSubsetDeprecated);
single_warning = 1;
KMP_DEBUG_ASSERT(flagC);
//KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
__kmp_place_core_offset = num;
next++;
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
KMP_DEBUG_ASSERT(flagC);
SKIP_WS(next);
if ( *next == '\0' )
return;
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get 4-th value (could be core-offset, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'o' || *next == 'O') {
if (!single_warning) { // warn once
KMP_WARNING(AffHWSubsetDeprecated);
}
KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
__kmp_place_core_offset = num;
next++;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
flagT = 1;
next++; // can have core-offset specified after num threads
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
return;
}
SKIP_WS(next);
if ( *next == '\0' )
return;
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get 5-th value (could be core-offset, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'o' || *next == 'O') {
if (!single_warning) { // warn once
KMP_WARNING(AffHWSubsetDeprecated);
}
KMP_DEBUG_ASSERT(flagT);
KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
__kmp_place_core_offset = num;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
}
} else {
KMP_WARNING(AffHWSubsetInvalid, name, value);
}
return;
#undef CHECK_DELIM
}
return;
err:
KMP_WARNING(AffHWSubsetInvalid, name, value);
__kmp_hws_requested = 0; // mark that subset not requested
return;
}
static void
__kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * data ) {
if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
if (__kmp_hws_requested) {
int comma = 0;
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
@ -4573,26 +4457,34 @@ __kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * dat
KMP_STR_BUF_PRINT_NAME_EX(name);
else
__kmp_str_buf_print(buffer, " %s='", name);
if (__kmp_place_num_sockets) {
__kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
if (__kmp_place_socket_offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset);
if (__kmp_hws_socket.num) {
__kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num);
if (__kmp_hws_socket.offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset);
comma = 1;
}
if (__kmp_place_num_cores) {
__kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
if (__kmp_place_core_offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset);
if (__kmp_hws_node.num) {
__kmp_str_buf_print(&buf, "%s%dn", comma?",":"", __kmp_hws_node.num);
if (__kmp_hws_node.offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset);
comma = 1;
}
if (__kmp_place_num_threads_per_core)
__kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
if (__kmp_hws_tile.num) {
__kmp_str_buf_print(&buf, "%s%dL2", comma?",":"", __kmp_hws_tile.num);
if (__kmp_hws_tile.offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset);
comma = 1;
}
if (__kmp_hws_core.num) {
__kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_hws_core.num);
if (__kmp_hws_core.offset)
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset);
comma = 1;
}
if (__kmp_hws_proc.num)
__kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_hws_proc.num);
__kmp_str_buf_print(buffer, "%s'\n", buf.str );
__kmp_str_buf_free(&buf);
/*
} else {
__kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) );
*/
}
}