[OpenMP][libomp] Add use-all syntax to KMP_HW_SUBSET

This patch allows the user to request all resources of a particular
layer (or core-attribute). The syntax of KMP_HW_SUBSET is modified
so the number of units requested is optional or can be replaced with an
'*' character.

e.g., KMP_HW_SUBSET=c:intel_atom@3 will use all the cores after offset 3
e.g., KMP_HW_SUBSET=*c:intel_core will use all the big cores
e.g., KMP_HW_SUBSET=*s,*c,1t will use all the sockets, all cores per
      each socket and 1 thread per core.

Differential Revision: https://reviews.llvm.org/D115826
This commit is contained in:
Jonathan Peyton 2021-12-15 14:36:44 -06:00
parent b600215e80
commit 6a556ecaf4
4 changed files with 33 additions and 13 deletions

View File

@ -468,7 +468,7 @@ An extended syntax is available when ``KMP_TOPOLOGY_METHOD=hwloc``. Depending on
resources are detected, you may be able to specify additional resources, such as
NUMA domains and groups of hardware resources that share certain cache levels.
**Basic syntax:** ``num_unitsID[@offset][:attribute] [,num_unitsID[@offset][:attribute]...]``
**Basic syntax:** ``[num_units|*]ID[@offset][:attribute] [,[num_units|*]ID[@offset][:attribute]...]``
Supported unit IDs are not case-insensitive.
@ -484,6 +484,11 @@ Supported unit IDs are not case-insensitive.
| ``T`` - thread
| ``num_units`` specifies the requested number of HW threads per core.
.. note::
``num_units`` can be left out or explicitly specified as ``*`` instead of a positive integer
meaning use all specified resources at that level.
e.g., ``1s,*c`` means use 1 socket and all the cores on that socket
``offset`` - (Optional) The number of units to skip.
``attribute`` - (Optional) An attribute differentiating resources at a particular level. The attributes available to users are:
@ -554,6 +559,8 @@ available hardware resources.
architecture, depending on ``KMP_TOPOLOGY_METHOD`` specified, as hwloc can
often detect more topology layers than the default method used by the OpenMP
run-time library.
* ``*c:eff1@3``: Use all available sockets, skip the first three cores of
efficiency 1, and then use the rest of the available cores of efficiency 1.
To see the result of the setting, you can specify ``verbose`` modifier in
``KMP_AFFINITY`` environment variable. The OpenMP run-time library will output

View File

@ -982,7 +982,8 @@ bool kmp_topology_t::filter_hw_subset() {
// Check to see if each layer's num & offset parameters are valid
max_count = get_ratio(level);
if (max_count < 0 || num + offset > max_count) {
if (max_count < 0 ||
(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
bool plural = (num > 1);
KMP_WARNING(AffHWSubsetManyGeneric,
__kmp_hw_get_catalog_string(type, plural));
@ -1053,7 +1054,8 @@ bool kmp_topology_t::filter_hw_subset() {
int level_above = core_level - 1;
if (level_above >= 0) {
max_count = get_ncores_with_attr_per(item.attr[j], level_above);
if (max_count <= 0 || num + offset > max_count) {
if (max_count <= 0 ||
(num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) {
kmp_str_buf_t buf;
__kmp_hw_get_catalog_core_string(item.attr[j], &buf, num > 0);
KMP_WARNING(AffHWSubsetManyGeneric, buf.str);
@ -1175,7 +1177,8 @@ bool kmp_topology_t::filter_hw_subset() {
sub_id = core_type_sub_ids.get_sub_id(hw_thread);
else
sub_id = core_eff_sub_ids.get_sub_id(hw_thread);
if (sub_id < offset || sub_id >= offset + num) {
if (sub_id < offset ||
(num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) {
should_be_filtered = true;
break;
}
@ -1183,7 +1186,8 @@ bool kmp_topology_t::filter_hw_subset() {
int num = hw_subset_item.num[0];
int offset = hw_subset_item.offset[0];
if (hw_thread.sub_ids[level] < offset ||
hw_thread.sub_ids[level] >= offset + num) {
(num != kmp_hw_subset_t::USE_ALL &&
hw_thread.sub_ids[level] >= offset + num)) {
should_be_filtered = true;
break;
}

View File

@ -15,6 +15,7 @@
#include "kmp.h"
#include "kmp_os.h"
#include <limits>
#if KMP_AFFINITY_SUPPORTED
#if KMP_USE_HWLOC
@ -879,6 +880,8 @@ public:
int offset[MAX_ATTRS];
kmp_hw_attr_t attr[MAX_ATTRS];
};
// Put parenthesis around max to avoid accidental use of Windows max macro.
const static int USE_ALL = (std::numeric_limits<int>::max)();
private:
int depth;

View File

@ -4978,10 +4978,20 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
char *attr_ptr;
int offset = 0;
kmp_hw_attr_t attr;
int num =
atoi(core_components[j]); // each component should start with a number
if (num <= 0) {
goto err; // only positive integers are valid for count
int num;
// components may begin with an optional count of the number of resources
if (isdigit(*core_components[j])) {
num = atoi(core_components[j]);
if (num <= 0) {
goto err; // only positive integers are valid for count
}
pos = core_components[j] + strspn(core_components[j], digits);
} else if (*core_components[j] == '*') {
num = kmp_hw_subset_t::USE_ALL;
pos = core_components[j] + 1;
} else {
num = kmp_hw_subset_t::USE_ALL;
pos = core_components[j];
}
offset_ptr = strchr(core_components[j], '@');
@ -5016,10 +5026,6 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
}
*attr_ptr = '\0'; // cut the attribute from the component
}
pos = core_components[j] + strspn(core_components[j], digits);
if (pos == core_components[j]) {
goto err;
}
// detect the component type
kmp_hw_t type = __kmp_stg_parse_hw_subset_name(pos);
if (type == KMP_HW_UNKNOWN) {