Added sockets to the syntax of KMP_PLACE_THREADS environment variable.

Added (optional) sockets to the syntax of the KMP_PLACE_THREADS environment variable.
Some limitations:
* The number of sockets and then optional offset should be specified first (before other parameters).
* The letter designation is mandatory for sockets and then for other parameters.
* If number of cores is specified first, then the number of sockets is defaulted to all sockets on the machine; also, the old syntax is partially supported if sockets are skipped.
* If number of threads per core is specified first, then the number of sockets and cores per socket are defaulted to all sockets and all cores per socket respectively.
* The number of cores per socket cannot be specified before sockets or after threads per core.
* The number of threads per core can be specified before or after core-offset (old syntax required it to be before core-offset);
* Parameters delimiter can be: empty, comma, lower-case x;
* Spaces are allowed around numbers, around letters, around delimiter.
Approximate shorthand specification:
KMP_PLACE_THREADS="[num_sockets(S|s)[[delim]offset(O|o)][delim]][num_cores_per_socket(C|c)[[delim]offset(O|o)][delim]][num_threads_per_core(T|t)]"

Differential Revision: http://reviews.llvm.org/D13175

llvm-svn: 249708
This commit is contained in:
Jonathan Peyton 2015-10-08 17:55:54 +00:00
parent 68a39a6565
commit dd4aa9b6b5
6 changed files with 224 additions and 107 deletions

View File

@ -388,7 +388,8 @@ OBSOLETE "%1$s: granularity=core will be used."
EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored."
FutexNotSupported "futex system call not supported; %1$s=%2$s ignored."
AffGranUsing "%1$s: granularity=%2$s will be used."
AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nC,mT[,kO]\"."
AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nS[,nO],nC[,nO],nT "
"(nSockets@offset, nCores@offset, nTthreads per core)\"."
AffThrPlaceUnsupported "KMP_PLACE_THREADS ignored: unsupported architecture."
AffThrPlaceManyCores "KMP_PLACE_THREADS ignored: too many cores requested."
SyntaxErrorUsing "%1$s: syntax error, using %2$s."
@ -402,6 +403,7 @@ AffThrPlaceNonUniform "KMP_PLACE_THREADS ignored: non-uniform topology."
AffThrPlaceNonThreeLevel "KMP_PLACE_THREADS ignored: only three-level topology is supported."
AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"."
AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested."
# --------------------------------------------------------------------------------------------------

View File

@ -788,9 +788,11 @@ typedef enum kmp_cancel_kind_t {
} kmp_cancel_kind_t;
#endif // OMP_40_ENABLED
extern int __kmp_place_num_sockets;
extern int __kmp_place_socket_offset;
extern int __kmp_place_num_cores;
extern int __kmp_place_num_threads_per_core;
extern int __kmp_place_core_offset;
extern int __kmp_place_num_threads_per_core;
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@ -3388,7 +3390,8 @@ KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
KMP_EXPORT void __kmpc_place_threads(int,int,int);
// this function exported for testing of KMP_PLACE_THREADS functionality
KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */

View File

@ -3055,12 +3055,18 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
if ( __kmp_place_num_cores == 0 ) {
if ( __kmp_place_num_threads_per_core == 0 ) {
return; // no cores limiting actions requested, exit
}
if (__kmp_place_num_sockets == 0 &&
__kmp_place_num_cores == 0 &&
__kmp_place_num_threads_per_core == 0 )
return; // no topology limiting actions requested, exit
if (__kmp_place_num_sockets == 0)
__kmp_place_num_sockets = nPackages; // use all available sockets
if (__kmp_place_num_cores == 0)
__kmp_place_num_cores = nCoresPerPkg; // use all available cores
}
if (__kmp_place_num_threads_per_core == 0 ||
__kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffThrPlaceNonUniform );
return; // don't support non-uniform topology
@ -3069,8 +3075,9 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
KMP_WARNING( AffThrPlaceNonThreeLevel );
return; // don't support not-3-level topology
}
if ( __kmp_place_num_threads_per_core == 0 ) {
__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
KMP_WARNING(AffThrPlaceManySockets);
return;
}
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
KMP_WARNING( AffThrPlaceManyCores );
@ -3078,23 +3085,31 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
}
AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
__kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
int i, j, k, n_old = 0, n_new = 0;
for ( i = 0; i < nPackages; ++i ) {
for ( j = 0; j < nCoresPerPkg; ++j ) {
if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
n_old += __kmp_nThreadsPerCore; // skip not-requested core
} else {
for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
if ( k < __kmp_place_num_threads_per_core ) {
newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
n_new++;
for (i = 0; i < nPackages; ++i)
if (i < __kmp_place_socket_offset ||
i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
else
for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
if (j < __kmp_place_core_offset ||
j >= __kmp_place_core_offset + __kmp_place_num_cores)
n_old += __kmp_nThreadsPerCore; // skip not-requested core
else
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
if (k < __kmp_place_num_threads_per_core) {
newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
n_new++;
}
n_old++;
}
n_old++;
}
}
}
}
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
__kmp_place_num_threads_per_core);
nPackages = __kmp_place_num_sockets; // correct nPackages
nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
__kmp_avail_proc = n_new; // correct avail_proc

View File

@ -2855,14 +2855,16 @@ __kmpc_get_parent_taskid() {
} // __kmpc_get_parent_taskid
void __kmpc_place_threads(int nC, int nT, int nO)
void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
{
if ( ! __kmp_init_serial ) {
__kmp_serial_initialize();
}
__kmp_place_num_sockets = nS;
__kmp_place_socket_offset = sO;
__kmp_place_num_cores = nC;
__kmp_place_core_offset = cO;
__kmp_place_num_threads_per_core = nT;
__kmp_place_core_offset = nO;
}
// end of file //

View File

@ -249,9 +249,11 @@ kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 };
int __kmp_affinity_num_places = 0;
#endif
int __kmp_place_num_sockets = 0;
int __kmp_place_socket_offset = 0;
int __kmp_place_num_cores = 0;
int __kmp_place_num_threads_per_core = 0;
int __kmp_place_core_offset = 0;
int __kmp_place_num_threads_per_core = 0;
kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams;

View File

@ -4117,127 +4117,220 @@ static void
__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) {
// Value example: 5Cx2Tx15O
// Which means "use 5 cores with offset 15, 2 threads per core"
// AC: extended to sockets level:
// 2s,6o,2c,2o,2t or 2s,6o,2c,2t,2o
// (to not break legacy code core-offset can be last).
// Note: not all syntax errors are analyzed, some may be skipped.
#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == '@' || *(_x) == 'x')
int num;
int prev_delim = 0;
int flagS = 0, flagC = 0, flagT = 0;
const char *next = value;
const char *prev;
SKIP_WS( next );
if ( *next == '\0' ) {
return; // leave default values
}
// Get num_cores first
if ( *next >= '0' && *next <= '9' ) {
SKIP_WS(next); // skip white spaces
if (*next == '\0')
return; // no data provided, retain default values
// Get num_sockets first (or whatever specified)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS( next );
num = __kmp_str_to_int( prev, *next );
SKIP_WS( next );
if ( *next == 'C' || *next == 'c' ) {
__kmp_place_num_cores = num;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 's' || *next == 'S') {
__kmp_place_num_sockets = num;
flagS = 1; // got num sockets
next++;
} else if ( *next == ',' || *next == 'x' ) {
} else if (*next == 'c' || *next == 'C') {
__kmp_place_num_cores = num;
prev_delim = 1;
flagS = flagC = 1; // sockets were not specified - use default
next++;
} else if ( *next == 'T' || *next == 't' ) {
} else if (CHECK_DELIM(next)) {
__kmp_place_num_cores = num; // no letter-designator - num cores
flagS = flagC = 1; // sockets were not specified - use default
next++;
} else if (*next == 't' || *next == 'T') {
__kmp_place_num_threads_per_core = num;
// sockets, cores were not specified - use default
return; // we ignore offset value in case all cores are used
} else if ( *next == '\0' ) {
} else if (*next == '\0') {
__kmp_place_num_cores = num;
return; // the only value provided
return; // the only value provided - set num cores
} else {
KMP_WARNING( AffThrPlaceInvalid, name, value );
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
} else if ( *next == ',' || *next == 'x' ) {
// First character is delimiter, skip it, leave num_cores default value
prev_delim = 2;
next++;
} else {
KMP_WARNING( AffThrPlaceInvalid, name, value );
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
SKIP_WS( next );
if ( *next == '\0' ) {
KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
SKIP_WS(next);
if (*next == '\0')
return; // " n " - something like this
}
if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) {
prev_delim = 1;
next++; // skip delimiter after num_core value
SKIP_WS( next );
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get threads_per_core next
if ( *next >= '0' && *next <= '9' ) {
prev_delim = 0;
// Get second value (could be offset, num_cores, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS( next );
num = __kmp_str_to_int( prev, *next );
SKIP_WS( next );
if ( *next == 'T' || *next == 't' ) {
__kmp_place_num_threads_per_core = num;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'o' || *next == 'O') { // offset specified
if (flagC) { // whether num_cores already specified (when sockets skipped)
__kmp_place_core_offset = num;
} else {
__kmp_place_socket_offset = num;
}
next++;
} else if ( *next == ',' || *next == 'x' ) {
__kmp_place_num_threads_per_core = num;
prev_delim = 1;
} else if (*next == 'c' || *next == 'C') {
KMP_DEBUG_ASSERT(flagC == 0);
__kmp_place_num_cores = num;
flagC = 1;
next++;
} else if ( *next == 'O' || *next == 'o' ) {
__kmp_place_core_offset = num;
return; // threads_per_core remains default
} else if ( *next == '\0' ) {
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
return;
flagC = 1; // num_cores could be skipped ?
flagT = 1;
next++; // can have core-offset specified after num threads
} else if (*next == '\0') {
KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
__kmp_place_num_threads_per_core = num;
return; // two values provided without letter-designator
} else {
KMP_WARNING( AffThrPlaceInvalid, name, value );
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
} else if ( *next == ',' || *next == 'x' ) {
if ( prev_delim == 2 ) {
return; // no sense in the only offset value, thus skip the rest
}
KMP_DEBUG_ASSERT( prev_delim == 1 );
next++; // no value for threads_per_core provided
} else {
KMP_WARNING( AffThrPlaceInvalid, name, value );
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
SKIP_WS( next );
if ( *next == '\0' ) {
return; // " nC,mT " - something like this
}
if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) {
prev_delim = 1;
next++; // skip delimiter after threads_per_core value
SKIP_WS( next );
SKIP_WS(next);
if (*next == '\0')
return; // " Ns,Nc " - something like this
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get core offset last if any,
// don't bother checking syntax after all data obtained
if ( *next >= '0' && *next <= '9' ) {
// Get third value (could be core-offset, num_cores, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS( next );
num = __kmp_str_to_int( prev, *next );
__kmp_place_core_offset = num;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'c' || *next == 'C') {
KMP_DEBUG_ASSERT(flagC == 0);
__kmp_place_num_cores = num;
flagC = 1;
next++;
} else if (*next == 'o' || *next == 'O') {
KMP_DEBUG_ASSERT(flagC);
__kmp_place_core_offset = num;
next++;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
if (flagC == 0)
return; // num_cores could be skipped (e.g. 2s,4o,2t)
flagT = 1;
next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
KMP_DEBUG_ASSERT(flagC);
SKIP_WS(next);
if ( *next == '\0' )
return;
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get 4-th value (could be core-offset, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'o' || *next == 'O') {
__kmp_place_core_offset = num;
next++;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
flagT = 1;
next++; // can have core-offset specified after num threads
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
SKIP_WS(next);
if ( *next == '\0' )
return;
if (CHECK_DELIM(next)) {
next++; // skip delimiter
SKIP_WS(next);
}
// Get 5-th value (could be core-offset, num_threads)
if (*next >= '0' && *next <= '9') {
prev = next;
SKIP_DIGITS(next);
num = __kmp_str_to_int(prev, *next);
SKIP_WS(next);
if (*next == 'o' || *next == 'O') {
KMP_DEBUG_ASSERT(flagT);
__kmp_place_core_offset = num;
} else if (*next == 't' || *next == 'T') {
KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
}
} else {
KMP_WARNING(AffThrPlaceInvalid, name, value);
}
return;
#undef CHECK_DELIM
}
static void
__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) {
if ( __kmp_place_num_cores + __kmp_place_num_threads_per_core ) {
if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
int comma = 0;
kmp_str_buf_t buf;
__kmp_str_buf_init( &buf );
if( __kmp_env_format ) {
__kmp_str_buf_init(&buf);
if(__kmp_env_format)
KMP_STR_BUF_PRINT_NAME_EX(name);
} else {
__kmp_str_buf_print( buffer, " %s='", name );
else
__kmp_str_buf_print(buffer, " %s='", name);
if (__kmp_place_num_sockets) {
__kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
if (__kmp_place_socket_offset)
__kmp_str_buf_print(&buf, "@%do", __kmp_place_socket_offset);
comma = 1;
}
__kmp_str_buf_print( &buf, "%dC", __kmp_place_num_cores );
__kmp_str_buf_print( &buf, "x%dT", __kmp_place_num_threads_per_core );
if ( __kmp_place_core_offset ) {
__kmp_str_buf_print( &buf, ",%dO", __kmp_place_core_offset );
if (__kmp_place_num_cores) {
__kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
if (__kmp_place_core_offset)
__kmp_str_buf_print(&buf, "@%do", __kmp_place_core_offset);
comma = 1;
}
if (__kmp_place_num_threads_per_core)
__kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
__kmp_str_buf_print(buffer, "%s'\n", buf.str );
__kmp_str_buf_free(&buf);
/*