forked from OSchip/llvm-project
[OpenMP] Refactor/Rework topology discovery code
This patch does the following: 1) Introduce kmp_topology_t as the runtime-friendly structure (the corresponding global variable is __kmp_topology) to determine the exact machine topology which can vary widely among current and future architectures. The current design is not easy to expand beyond the assumed three layer topology: sockets, cores, and threads so a rework capable of using the existing KMP_AFFINITY mechanisms is required. This new topology structure has: * The depth and types of the topology * Ratio count for each consecutive level (e.g., number of cores per socket, number of threads per core) * Absolute count for each level (e.g., 2 sockets, 16 cores, 32 threads) * Equivalent topology layer map (e.g., Numa domain is equivalent to socket, L1/L2 cache equivalent to core) * Whether it is uniform or not The hardware threads are represented with the kmp_hw_thread_t structure. This structure contains the ids (e.g., socket 0, core 1, thread 0) and other information grabbed from the previous Address structure. The kmp_topology_t structure contains an array of these. 2) Generalize the KMP_HW_SUBSET envirable for the new kmp_topology_t structure. The algorithm doesn't assume any order with tiles,numa domains,sockets,cores,threads. Instead it just parses the envirable, makes sure it is consistent with the detected topology (including taking into account equivalent layers) and then trims away the unneeded subset of hardware threads. To enable this, a new kmp_hw_subset_t structure is introduced which contains a vector of items (hardware type, number user wants, offset). Any keyword within __kmp_hw_get_keyword() can be used as a name and can be shortened as well. e.g., KMP_HW_SUBSET=1s,2numa,4tile,2c,3t can be used on the KNL SNC-4 machine. 3) Simplify topology detection functions so they only do the singular task of detecting the machine's topology. Printing, and all canonicalizing functionality is now done afterwards. So many lines of duplicated code are eliminated. 4) Add new ll_caches and numa_domains to OMP_PLACES, and consequently, KMP_AFFINITY's granularity setting. All the names within __kmp_hw_get_keyword() are available for use in OMP_PLACES or KMP_AFFINITY's granularity setting. 5) Simplify and future-proof code where explicit lists of allowed affinity settings keywords inside if() conditions. 6) Add x86 CPUID leaf 4 cache detection to existing x2apic id method so equivalent caches could be detected (in particular for the ll_caches place). Differential Revision: https://reviews.llvm.org/D100997
This commit is contained in:
parent
32b500431c
commit
9982f33e2c
|
@ -124,6 +124,9 @@ ProcGroup "processor group"
|
||||||
ProcGroups "processor groups"
|
ProcGroups "processor groups"
|
||||||
Unknown "unknown"
|
Unknown "unknown"
|
||||||
NoLeaf31Support "cpuid leaf 31 not supported"
|
NoLeaf31Support "cpuid leaf 31 not supported"
|
||||||
|
HwlocFailed "Hwloc api failure"
|
||||||
|
LLCache "LL cache"
|
||||||
|
LLCaches "LL caches"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -355,6 +358,7 @@ OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the d
|
||||||
"This issue is fixed in an up-to-date compiler."
|
"This issue is fixed in an up-to-date compiler."
|
||||||
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
|
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
|
||||||
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
|
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
|
||||||
|
AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."
|
||||||
|
|
||||||
# --- OpenMP errors detected at runtime ---
|
# --- OpenMP errors detected at runtime ---
|
||||||
#
|
#
|
||||||
|
@ -458,6 +462,11 @@ AffNotUsingHwloc "%1$s: Affinity not capable, using hwloc."
|
||||||
UserDirectedError "%1$s: Encountered user-directed error: %2$s."
|
UserDirectedError "%1$s: Encountered user-directed error: %2$s."
|
||||||
UserDirectedWarning "%1$s: Encountered user-directed warning: %2$s."
|
UserDirectedWarning "%1$s: Encountered user-directed warning: %2$s."
|
||||||
FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
|
FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
|
||||||
|
AffHWSubsetManyGeneric "KMP_HW_SUBSET ignored: %1$s: too many requested."
|
||||||
|
AffHWSubsetNotExistGeneric "KMP_HW_SUBSET ignored: %1$s: level not detected in machine topology."
|
||||||
|
AffHWSubsetEqvLayers "KMP_HW_SUBSET ignored: %1$s, %2$s: layers are equivalent, please only specify one."
|
||||||
|
AffHWSubsetOutOfOrder "KMP_HW_SUBSET ignored: %1$s layer should come after %2$s."
|
||||||
|
AffEqualTopologyTypes "%1$s: topology layer \"%2$s\" is equivalent to \"%3$s\"."
|
||||||
|
|
||||||
# --------------------------------------------------------------------------------------------------
|
# --------------------------------------------------------------------------------------------------
|
||||||
-*- HINTS -*-
|
-*- HINTS -*-
|
||||||
|
|
|
@ -597,11 +597,11 @@ typedef int PACKED_REDUCTION_METHOD_T;
|
||||||
|
|
||||||
enum kmp_hw_t : int {
|
enum kmp_hw_t : int {
|
||||||
KMP_HW_UNKNOWN = -1,
|
KMP_HW_UNKNOWN = -1,
|
||||||
KMP_HW_MACHINE = 0,
|
KMP_HW_SOCKET = 0,
|
||||||
KMP_HW_SOCKET,
|
|
||||||
KMP_HW_PROC_GROUP,
|
KMP_HW_PROC_GROUP,
|
||||||
KMP_HW_NUMA,
|
KMP_HW_NUMA,
|
||||||
KMP_HW_DIE,
|
KMP_HW_DIE,
|
||||||
|
KMP_HW_LLC,
|
||||||
KMP_HW_L3,
|
KMP_HW_L3,
|
||||||
KMP_HW_TILE,
|
KMP_HW_TILE,
|
||||||
KMP_HW_MODULE,
|
KMP_HW_MODULE,
|
||||||
|
@ -612,13 +612,16 @@ enum kmp_hw_t : int {
|
||||||
KMP_HW_LAST
|
KMP_HW_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
#define KMP_ASSERT_VALID_HW_TYPE(type) \
|
#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \
|
||||||
KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
|
KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
|
||||||
|
#define KMP_ASSERT_VALID_HW_TYPE(type) \
|
||||||
|
KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
|
||||||
|
|
||||||
#define KMP_FOREACH_HW_TYPE(type) \
|
#define KMP_FOREACH_HW_TYPE(type) \
|
||||||
for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
|
for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
|
||||||
type = (kmp_hw_t)((int)type + 1))
|
type = (kmp_hw_t)((int)type + 1))
|
||||||
|
|
||||||
|
const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
|
||||||
const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
|
const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
|
||||||
|
|
||||||
/* Only Linux* OS and Windows* OS support thread affinity. */
|
/* Only Linux* OS and Windows* OS support thread affinity. */
|
||||||
|
@ -655,8 +658,6 @@ extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
|
||||||
#if KMP_USE_HWLOC
|
#if KMP_USE_HWLOC
|
||||||
extern hwloc_topology_t __kmp_hwloc_topology;
|
extern hwloc_topology_t __kmp_hwloc_topology;
|
||||||
extern int __kmp_hwloc_error;
|
extern int __kmp_hwloc_error;
|
||||||
extern int __kmp_numa_detected;
|
|
||||||
extern int __kmp_tile_depth;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern size_t __kmp_affin_mask_size;
|
extern size_t __kmp_affin_mask_size;
|
||||||
|
@ -784,23 +785,6 @@ enum affinity_type {
|
||||||
affinity_default
|
affinity_default
|
||||||
};
|
};
|
||||||
|
|
||||||
enum affinity_gran {
|
|
||||||
affinity_gran_fine = 0,
|
|
||||||
affinity_gran_thread,
|
|
||||||
affinity_gran_core,
|
|
||||||
affinity_gran_tile,
|
|
||||||
affinity_gran_die,
|
|
||||||
affinity_gran_numa,
|
|
||||||
affinity_gran_package,
|
|
||||||
affinity_gran_node,
|
|
||||||
#if KMP_GROUP_AFFINITY
|
|
||||||
// The "group" granularity isn't necesssarily coarser than all of the
|
|
||||||
// other levels, but we put it last in the enum.
|
|
||||||
affinity_gran_group,
|
|
||||||
#endif /* KMP_GROUP_AFFINITY */
|
|
||||||
affinity_gran_default
|
|
||||||
};
|
|
||||||
|
|
||||||
enum affinity_top_method {
|
enum affinity_top_method {
|
||||||
affinity_top_method_all = 0, // try all (supported) methods, in order
|
affinity_top_method_all = 0, // try all (supported) methods, in order
|
||||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||||
|
@ -822,7 +806,7 @@ enum affinity_top_method {
|
||||||
#define affinity_respect_mask_default (-1)
|
#define affinity_respect_mask_default (-1)
|
||||||
|
|
||||||
extern enum affinity_type __kmp_affinity_type; /* Affinity type */
|
extern enum affinity_type __kmp_affinity_type; /* Affinity type */
|
||||||
extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
|
extern kmp_hw_t __kmp_affinity_gran; /* Affinity granularity */
|
||||||
extern int __kmp_affinity_gran_levels; /* corresponding int value */
|
extern int __kmp_affinity_gran_levels; /* corresponding int value */
|
||||||
extern int __kmp_affinity_dups; /* Affinity duplicate masks */
|
extern int __kmp_affinity_dups; /* Affinity duplicate masks */
|
||||||
extern enum affinity_top_method __kmp_affinity_top_method;
|
extern enum affinity_top_method __kmp_affinity_top_method;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -598,91 +598,274 @@ class KMPNativeAffinity : public KMPAffinity {
|
||||||
#endif /* KMP_OS_WINDOWS */
|
#endif /* KMP_OS_WINDOWS */
|
||||||
#endif /* KMP_AFFINITY_SUPPORTED */
|
#endif /* KMP_AFFINITY_SUPPORTED */
|
||||||
|
|
||||||
class Address {
|
class kmp_hw_thread_t {
|
||||||
public:
|
public:
|
||||||
static const unsigned maxDepth = 32;
|
static const int UNKNOWN_ID = -1;
|
||||||
unsigned labels[maxDepth];
|
static int compare_ids(const void *a, const void *b);
|
||||||
unsigned childNums[maxDepth];
|
static int compare_compact(const void *a, const void *b);
|
||||||
unsigned depth;
|
int ids[KMP_HW_LAST];
|
||||||
unsigned leader;
|
int sub_ids[KMP_HW_LAST];
|
||||||
Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
|
bool leader;
|
||||||
Address &operator=(const Address &b) {
|
int os_id;
|
||||||
depth = b.depth;
|
void print() const;
|
||||||
for (unsigned i = 0; i < depth; i++) {
|
void clear() {
|
||||||
labels[i] = b.labels[i];
|
for (int i = 0; i < (int)KMP_HW_LAST; ++i)
|
||||||
childNums[i] = b.childNums[i];
|
ids[i] = UNKNOWN_ID;
|
||||||
}
|
leader = false;
|
||||||
leader = FALSE;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
bool operator==(const Address &b) const {
|
|
||||||
if (depth != b.depth)
|
|
||||||
return false;
|
|
||||||
for (unsigned i = 0; i < depth; i++)
|
|
||||||
if (labels[i] != b.labels[i])
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool isClose(const Address &b, int level) const {
|
|
||||||
if (depth != b.depth)
|
|
||||||
return false;
|
|
||||||
if ((unsigned)level >= depth)
|
|
||||||
return true;
|
|
||||||
for (unsigned i = 0; i < (depth - level); i++)
|
|
||||||
if (labels[i] != b.labels[i])
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool operator!=(const Address &b) const { return !operator==(b); }
|
|
||||||
void print() const {
|
|
||||||
unsigned i;
|
|
||||||
printf("Depth: %u --- ", depth);
|
|
||||||
for (i = 0; i < depth; i++) {
|
|
||||||
printf("%u ", labels[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class AddrUnsPair {
|
class kmp_topology_t {
|
||||||
public:
|
|
||||||
Address first;
|
struct flags_t {
|
||||||
unsigned second;
|
int uniform : 1;
|
||||||
AddrUnsPair(Address _first, unsigned _second)
|
int reserved : 31;
|
||||||
: first(_first), second(_second) {}
|
|
||||||
AddrUnsPair &operator=(const AddrUnsPair &b) {
|
|
||||||
first = b.first;
|
|
||||||
second = b.second;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
void print() const {
|
|
||||||
printf("first = ");
|
|
||||||
first.print();
|
|
||||||
printf(" --- second = %u", second);
|
|
||||||
}
|
|
||||||
bool operator==(const AddrUnsPair &b) const {
|
|
||||||
if (first != b.first)
|
|
||||||
return false;
|
|
||||||
if (second != b.second)
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
|
int depth;
|
||||||
const Address *aa = &(((const AddrUnsPair *)a)->first);
|
|
||||||
const Address *bb = &(((const AddrUnsPair *)b)->first);
|
// The following arrays are all 'depth' long
|
||||||
unsigned depth = aa->depth;
|
|
||||||
unsigned i;
|
// Orderd array of the types in the topology
|
||||||
KMP_DEBUG_ASSERT(depth == bb->depth);
|
kmp_hw_t *types;
|
||||||
for (i = 0; i < depth; i++) {
|
|
||||||
if (aa->labels[i] < bb->labels[i])
|
// Keep quick topology ratios, for non-uniform topologies,
|
||||||
|
// this ratio holds the max number of itemAs per itemB
|
||||||
|
// e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
|
||||||
|
int *ratio;
|
||||||
|
|
||||||
|
// Storage containing the absolute number of each topology layer
|
||||||
|
int *count;
|
||||||
|
|
||||||
|
// The hardware threads array
|
||||||
|
// hw_threads is num_hw_threads long
|
||||||
|
// Each hw_thread's ids and sub_ids are depth deep
|
||||||
|
int num_hw_threads;
|
||||||
|
kmp_hw_thread_t *hw_threads;
|
||||||
|
|
||||||
|
// Equivalence hash where the key is the hardware topology item
|
||||||
|
// and the value is the equivalent hardware topology type in the
|
||||||
|
// types[] array, if the value is KMP_HW_UNKNOWN, then there is no
|
||||||
|
// known equivalence for the topology type
|
||||||
|
kmp_hw_t equivalent[KMP_HW_LAST];
|
||||||
|
|
||||||
|
// Flags describing the topology
|
||||||
|
flags_t flags;
|
||||||
|
|
||||||
|
// Count each item & get the num x's per y
|
||||||
|
// e.g., get the number of cores and the number of threads per core
|
||||||
|
// for each (x, y) in (KMP_HW_* , KMP_HW_*)
|
||||||
|
void _gather_enumeration_information();
|
||||||
|
|
||||||
|
// Remove layers that don't add information to the topology.
|
||||||
|
// This is done by having the layer take on the id = UNKNOWN_ID (-1)
|
||||||
|
void _remove_radix1_layers();
|
||||||
|
|
||||||
|
// Find out if the topology is uniform
|
||||||
|
void _discover_uniformity();
|
||||||
|
|
||||||
|
// Set all the sub_ids for each hardware thread
|
||||||
|
void _set_sub_ids();
|
||||||
|
|
||||||
|
// Set global affinity variables describing the number of threads per
|
||||||
|
// core, the number of packages, the number of cores per package, and
|
||||||
|
// the number of cores.
|
||||||
|
void _set_globals();
|
||||||
|
|
||||||
|
// Set the last level cache equivalent type
|
||||||
|
void _set_last_level_cache();
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Force use of allocate()/deallocate()
|
||||||
|
kmp_topology_t() = delete;
|
||||||
|
kmp_topology_t(const kmp_topology_t &t) = delete;
|
||||||
|
kmp_topology_t(kmp_topology_t &&t) = delete;
|
||||||
|
kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
|
||||||
|
kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
|
||||||
|
|
||||||
|
static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
|
||||||
|
static void deallocate(kmp_topology_t *);
|
||||||
|
|
||||||
|
// Functions used in create_map() routines
|
||||||
|
kmp_hw_thread_t &at(int index) {
|
||||||
|
KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
|
||||||
|
return hw_threads[index];
|
||||||
|
}
|
||||||
|
const kmp_hw_thread_t &at(int index) const {
|
||||||
|
KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
|
||||||
|
return hw_threads[index];
|
||||||
|
}
|
||||||
|
int get_num_hw_threads() const { return num_hw_threads; }
|
||||||
|
void sort_ids() {
|
||||||
|
qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
|
||||||
|
kmp_hw_thread_t::compare_ids);
|
||||||
|
}
|
||||||
|
// Check if the hardware ids are unique, if they are
|
||||||
|
// return true, otherwise return false
|
||||||
|
bool check_ids() const;
|
||||||
|
|
||||||
|
// Function to call after the create_map() routine
|
||||||
|
void canonicalize();
|
||||||
|
void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
|
||||||
|
|
||||||
|
// Functions used after canonicalize() called
|
||||||
|
bool filter_hw_subset();
|
||||||
|
bool is_close(int hwt1, int hwt2, int level) const;
|
||||||
|
bool is_uniform() const { return flags.uniform; }
|
||||||
|
// Tell whether a type is a valid type in the topology
|
||||||
|
// returns KMP_HW_UNKNOWN when there is no equivalent type
|
||||||
|
kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
|
||||||
|
// Set type1 = type2
|
||||||
|
void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
|
||||||
|
KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
|
||||||
|
KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
|
||||||
|
kmp_hw_t real_type2 = equivalent[type2];
|
||||||
|
if (real_type2 == KMP_HW_UNKNOWN)
|
||||||
|
real_type2 = type2;
|
||||||
|
equivalent[type1] = real_type2;
|
||||||
|
// This loop is required since any of the types may have been set to
|
||||||
|
// be equivalent to type1. They all must be checked and reset to type2.
|
||||||
|
KMP_FOREACH_HW_TYPE(type) {
|
||||||
|
if (equivalent[type] == type1) {
|
||||||
|
equivalent[type] = real_type2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Calculate number of types corresponding to level1
|
||||||
|
// per types corresponding to level2 (e.g., number of threads per core)
|
||||||
|
int calculate_ratio(int level1, int level2) const {
|
||||||
|
KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
|
||||||
|
KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
|
||||||
|
int r = 1;
|
||||||
|
for (int level = level1; level > level2; --level)
|
||||||
|
r *= ratio[level];
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
int get_ratio(int level) const {
|
||||||
|
KMP_DEBUG_ASSERT(level >= 0 && level < depth);
|
||||||
|
return ratio[level];
|
||||||
|
}
|
||||||
|
int get_depth() const { return depth; };
|
||||||
|
kmp_hw_t get_type(int level) const {
|
||||||
|
KMP_DEBUG_ASSERT(level >= 0 && level < depth);
|
||||||
|
return types[level];
|
||||||
|
}
|
||||||
|
int get_level(kmp_hw_t type) const {
|
||||||
|
KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
|
||||||
|
int eq_type = equivalent[type];
|
||||||
|
if (eq_type == KMP_HW_UNKNOWN)
|
||||||
|
return -1;
|
||||||
|
for (int i = 0; i < depth; ++i)
|
||||||
|
if (types[i] == eq_type)
|
||||||
|
return i;
|
||||||
return -1;
|
return -1;
|
||||||
if (aa->labels[i] > bb->labels[i])
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
return 0;
|
int get_count(int level) const {
|
||||||
|
KMP_DEBUG_ASSERT(level >= 0 && level < depth);
|
||||||
|
return count[level];
|
||||||
}
|
}
|
||||||
|
#if KMP_AFFINITY_SUPPORTED
|
||||||
|
void sort_compact() {
|
||||||
|
qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
|
||||||
|
kmp_hw_thread_t::compare_compact);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
void print(const char *env_var = "KMP_AFFINITY") const;
|
||||||
|
void dump() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class kmp_hw_subset_t {
|
||||||
|
public:
|
||||||
|
struct item_t {
|
||||||
|
int num;
|
||||||
|
kmp_hw_t type;
|
||||||
|
int offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
int depth;
|
||||||
|
int capacity;
|
||||||
|
item_t *items;
|
||||||
|
kmp_uint64 set;
|
||||||
|
bool absolute;
|
||||||
|
// The set must be able to handle up to KMP_HW_LAST number of layers
|
||||||
|
KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Force use of allocate()/deallocate()
|
||||||
|
kmp_hw_subset_t() = delete;
|
||||||
|
kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
|
||||||
|
kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
|
||||||
|
kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
|
||||||
|
kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
|
||||||
|
|
||||||
|
static kmp_hw_subset_t *allocate() {
|
||||||
|
int initial_capacity = 5;
|
||||||
|
kmp_hw_subset_t *retval =
|
||||||
|
(kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
|
||||||
|
retval->depth = 0;
|
||||||
|
retval->capacity = initial_capacity;
|
||||||
|
retval->set = 0ull;
|
||||||
|
retval->absolute = false;
|
||||||
|
retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
static void deallocate(kmp_hw_subset_t *subset) {
|
||||||
|
__kmp_free(subset->items);
|
||||||
|
__kmp_free(subset);
|
||||||
|
}
|
||||||
|
void set_absolute() { absolute = true; }
|
||||||
|
bool is_absolute() const { return absolute; }
|
||||||
|
void push_back(int num, kmp_hw_t type, int offset) {
|
||||||
|
if (depth == capacity - 1) {
|
||||||
|
capacity *= 2;
|
||||||
|
item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
|
||||||
|
for (int i = 0; i < depth; ++i)
|
||||||
|
new_items[i] = items[i];
|
||||||
|
__kmp_free(items);
|
||||||
|
items = new_items;
|
||||||
|
}
|
||||||
|
items[depth].num = num;
|
||||||
|
items[depth].type = type;
|
||||||
|
items[depth].offset = offset;
|
||||||
|
depth++;
|
||||||
|
set |= (1ull << type);
|
||||||
|
}
|
||||||
|
int get_depth() const { return depth; }
|
||||||
|
const item_t &at(int index) const {
|
||||||
|
KMP_DEBUG_ASSERT(index >= 0 && index < depth);
|
||||||
|
return items[index];
|
||||||
|
}
|
||||||
|
item_t &at(int index) {
|
||||||
|
KMP_DEBUG_ASSERT(index >= 0 && index < depth);
|
||||||
|
return items[index];
|
||||||
|
}
|
||||||
|
void remove(int index) {
|
||||||
|
KMP_DEBUG_ASSERT(index >= 0 && index < depth);
|
||||||
|
set &= ~(1ull << items[index].type);
|
||||||
|
for (int j = index + 1; j < depth; ++j) {
|
||||||
|
items[j - 1] = items[j];
|
||||||
|
}
|
||||||
|
depth--;
|
||||||
|
}
|
||||||
|
bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
|
||||||
|
void dump() const {
|
||||||
|
printf("**********************\n");
|
||||||
|
printf("*** kmp_hw_subset: ***\n");
|
||||||
|
printf("* depth: %d\n", depth);
|
||||||
|
printf("* items:\n");
|
||||||
|
for (int i = 0; i < depth; ++i) {
|
||||||
|
printf("num: %d, type: %s, offset: %d\n", items[i].num,
|
||||||
|
__kmp_hw_get_keyword(items[i].type), items[i].offset);
|
||||||
|
}
|
||||||
|
printf("* set: 0x%llx\n", set);
|
||||||
|
printf("* absolute: %d\n", absolute);
|
||||||
|
printf("**********************\n");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
extern kmp_topology_t *__kmp_topology;
|
||||||
|
extern kmp_hw_subset_t *__kmp_hw_subset;
|
||||||
|
|
||||||
/* A structure for holding machine-specific hierarchy info to be computed once
|
/* A structure for holding machine-specific hierarchy info to be computed once
|
||||||
at init. This structure represents a mapping of threads to the actual machine
|
at init. This structure represents a mapping of threads to the actual machine
|
||||||
|
@ -721,18 +904,10 @@ public:
|
||||||
kmp_uint32 *numPerLevel;
|
kmp_uint32 *numPerLevel;
|
||||||
kmp_uint32 *skipPerLevel;
|
kmp_uint32 *skipPerLevel;
|
||||||
|
|
||||||
void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
|
void deriveLevels() {
|
||||||
int hier_depth = adr2os[0].first.depth;
|
int hier_depth = __kmp_topology->get_depth();
|
||||||
int level = 0;
|
for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
|
||||||
for (int i = hier_depth - 1; i >= 0; --i) {
|
numPerLevel[level] = __kmp_topology->get_ratio(i);
|
||||||
int max = -1;
|
|
||||||
for (int j = 0; j < num_addrs; ++j) {
|
|
||||||
int next = adr2os[j].first.childNums[i];
|
|
||||||
if (next > max)
|
|
||||||
max = next;
|
|
||||||
}
|
|
||||||
numPerLevel[level] = max + 1;
|
|
||||||
++level;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -747,7 +922,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void init(AddrUnsPair *adr2os, int num_addrs) {
|
void init(int num_addrs) {
|
||||||
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
|
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
|
||||||
&uninitialized, not_initialized, initializing);
|
&uninitialized, not_initialized, initializing);
|
||||||
if (bool_result == 0) { // Wait for initialization
|
if (bool_result == 0) { // Wait for initialization
|
||||||
|
@ -774,10 +949,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort table by physical ID
|
// Sort table by physical ID
|
||||||
if (adr2os) {
|
if (__kmp_topology && __kmp_topology->get_depth() > 0) {
|
||||||
qsort(adr2os, num_addrs, sizeof(*adr2os),
|
deriveLevels();
|
||||||
__kmp_affinity_cmp_Address_labels);
|
|
||||||
deriveLevels(adr2os, num_addrs);
|
|
||||||
} else {
|
} else {
|
||||||
numPerLevel[0] = maxLeaves;
|
numPerLevel[0] = maxLeaves;
|
||||||
numPerLevel[1] = num_addrs / maxLeaves;
|
numPerLevel[1] = num_addrs / maxLeaves;
|
||||||
|
|
|
@ -247,8 +247,6 @@ KMPAffinity *__kmp_affinity_dispatch = NULL;
|
||||||
#if KMP_USE_HWLOC
|
#if KMP_USE_HWLOC
|
||||||
int __kmp_hwloc_error = FALSE;
|
int __kmp_hwloc_error = FALSE;
|
||||||
hwloc_topology_t __kmp_hwloc_topology = NULL;
|
hwloc_topology_t __kmp_hwloc_topology = NULL;
|
||||||
int __kmp_numa_detected = FALSE;
|
|
||||||
int __kmp_tile_depth = 0;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if KMP_OS_WINDOWS
|
#if KMP_OS_WINDOWS
|
||||||
|
@ -263,7 +261,7 @@ kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL;
|
||||||
|
|
||||||
size_t __kmp_affin_mask_size = 0;
|
size_t __kmp_affin_mask_size = 0;
|
||||||
enum affinity_type __kmp_affinity_type = affinity_default;
|
enum affinity_type __kmp_affinity_type = affinity_default;
|
||||||
enum affinity_gran __kmp_affinity_gran = affinity_gran_default;
|
kmp_hw_t __kmp_affinity_gran = KMP_HW_UNKNOWN;
|
||||||
int __kmp_affinity_gran_levels = -1;
|
int __kmp_affinity_gran_levels = -1;
|
||||||
int __kmp_affinity_dups = TRUE;
|
int __kmp_affinity_dups = TRUE;
|
||||||
enum affinity_top_method __kmp_affinity_top_method =
|
enum affinity_top_method __kmp_affinity_top_method =
|
||||||
|
@ -286,15 +284,6 @@ int __kmp_affinity_num_places = 0;
|
||||||
int __kmp_display_affinity = FALSE;
|
int __kmp_display_affinity = FALSE;
|
||||||
char *__kmp_affinity_format = NULL;
|
char *__kmp_affinity_format = NULL;
|
||||||
|
|
||||||
kmp_hws_item_t __kmp_hws_socket = {0, 0};
|
|
||||||
kmp_hws_item_t __kmp_hws_die = {0, 0};
|
|
||||||
kmp_hws_item_t __kmp_hws_node = {0, 0};
|
|
||||||
kmp_hws_item_t __kmp_hws_tile = {0, 0};
|
|
||||||
kmp_hws_item_t __kmp_hws_core = {0, 0};
|
|
||||||
kmp_hws_item_t __kmp_hws_proc = {0, 0};
|
|
||||||
int __kmp_hws_requested = 0;
|
|
||||||
int __kmp_hws_abs_flag = 0; // absolute or per-item number requested
|
|
||||||
|
|
||||||
kmp_int32 __kmp_default_device = 0;
|
kmp_int32 __kmp_default_device = 0;
|
||||||
|
|
||||||
kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams;
|
kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams;
|
||||||
|
|
|
@ -2069,9 +2069,9 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
|
||||||
enum affinity_type *out_type,
|
enum affinity_type *out_type,
|
||||||
char **out_proclist, int *out_verbose,
|
char **out_proclist, int *out_verbose,
|
||||||
int *out_warn, int *out_respect,
|
int *out_warn, int *out_respect,
|
||||||
enum affinity_gran *out_gran,
|
kmp_hw_t *out_gran, int *out_gran_levels,
|
||||||
int *out_gran_levels, int *out_dups,
|
int *out_dups, int *out_compact,
|
||||||
int *out_compact, int *out_offset) {
|
int *out_offset) {
|
||||||
char *buffer = NULL; // Copy of env var value.
|
char *buffer = NULL; // Copy of env var value.
|
||||||
char *buf = NULL; // Buffer for strtok_r() function.
|
char *buf = NULL; // Buffer for strtok_r() function.
|
||||||
char *next = NULL; // end of token / start of next.
|
char *next = NULL; // end of token / start of next.
|
||||||
|
@ -2087,6 +2087,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
|
||||||
int respect = 0;
|
int respect = 0;
|
||||||
int gran = 0;
|
int gran = 0;
|
||||||
int dups = 0;
|
int dups = 0;
|
||||||
|
bool set = false;
|
||||||
|
|
||||||
KMP_ASSERT(value != NULL);
|
KMP_ASSERT(value != NULL);
|
||||||
|
|
||||||
|
@ -2232,33 +2233,37 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
|
||||||
SKIP_WS(next);
|
SKIP_WS(next);
|
||||||
|
|
||||||
buf = next;
|
buf = next;
|
||||||
|
|
||||||
|
// Try any hardware topology type for granularity
|
||||||
|
KMP_FOREACH_HW_TYPE(type) {
|
||||||
|
const char *name = __kmp_hw_get_keyword(type);
|
||||||
|
if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
|
||||||
|
set_gran(type, -1);
|
||||||
|
buf = next;
|
||||||
|
set = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!set) {
|
||||||
|
// Support older names for different granularity layers
|
||||||
if (__kmp_match_str("fine", buf, CCAST(const char **, &next))) {
|
if (__kmp_match_str("fine", buf, CCAST(const char **, &next))) {
|
||||||
set_gran(affinity_gran_fine, -1);
|
set_gran(KMP_HW_THREAD, -1);
|
||||||
buf = next;
|
buf = next;
|
||||||
} else if (__kmp_match_str("thread", buf, CCAST(const char **, &next))) {
|
set = true;
|
||||||
set_gran(affinity_gran_thread, -1);
|
} else if (__kmp_match_str("package", buf,
|
||||||
buf = next;
|
CCAST(const char **, &next))) {
|
||||||
} else if (__kmp_match_str("core", buf, CCAST(const char **, &next))) {
|
set_gran(KMP_HW_SOCKET, -1);
|
||||||
set_gran(affinity_gran_core, -1);
|
|
||||||
buf = next;
|
|
||||||
#if KMP_USE_HWLOC
|
|
||||||
} else if (__kmp_match_str("tile", buf, CCAST(const char **, &next))) {
|
|
||||||
set_gran(affinity_gran_tile, -1);
|
|
||||||
buf = next;
|
|
||||||
#endif
|
|
||||||
} else if (__kmp_match_str("die", buf, CCAST(const char **, &next))) {
|
|
||||||
set_gran(affinity_gran_die, -1);
|
|
||||||
buf = next;
|
|
||||||
} else if (__kmp_match_str("package", buf, CCAST(const char **, &next))) {
|
|
||||||
set_gran(affinity_gran_package, -1);
|
|
||||||
buf = next;
|
buf = next;
|
||||||
|
set = true;
|
||||||
} else if (__kmp_match_str("node", buf, CCAST(const char **, &next))) {
|
} else if (__kmp_match_str("node", buf, CCAST(const char **, &next))) {
|
||||||
set_gran(affinity_gran_node, -1);
|
set_gran(KMP_HW_NUMA, -1);
|
||||||
buf = next;
|
buf = next;
|
||||||
|
set = true;
|
||||||
#if KMP_GROUP_AFFINITY
|
#if KMP_GROUP_AFFINITY
|
||||||
} else if (__kmp_match_str("group", buf, CCAST(const char **, &next))) {
|
} else if (__kmp_match_str("group", buf, CCAST(const char **, &next))) {
|
||||||
set_gran(affinity_gran_group, -1);
|
set_gran(KMP_HW_PROC_GROUP, -1);
|
||||||
buf = next;
|
buf = next;
|
||||||
|
set = true;
|
||||||
#endif /* KMP_GROUP AFFINITY */
|
#endif /* KMP_GROUP AFFINITY */
|
||||||
} else if ((*buf >= '0') && (*buf <= '9')) {
|
} else if ((*buf >= '0') && (*buf <= '9')) {
|
||||||
int n;
|
int n;
|
||||||
|
@ -2267,11 +2272,13 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
|
||||||
n = __kmp_str_to_int(buf, *next);
|
n = __kmp_str_to_int(buf, *next);
|
||||||
KMP_ASSERT(n >= 0);
|
KMP_ASSERT(n >= 0);
|
||||||
buf = next;
|
buf = next;
|
||||||
set_gran(affinity_gran_default, n);
|
set_gran(KMP_HW_UNKNOWN, n);
|
||||||
|
set = true;
|
||||||
} else {
|
} else {
|
||||||
EMIT_WARN(TRUE, (AffInvalidParam, name, start));
|
EMIT_WARN(TRUE, (AffInvalidParam, name, start));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else if (__kmp_match_str("proclist", buf, CCAST(const char **, &next))) {
|
} else if (__kmp_match_str("proclist", buf, CCAST(const char **, &next))) {
|
||||||
char *temp_proclist;
|
char *temp_proclist;
|
||||||
|
|
||||||
|
@ -2377,20 +2384,20 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
|
||||||
*out_offset = number[1];
|
*out_offset = number[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (__kmp_affinity_gran == affinity_gran_default) {
|
if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
|
||||||
#if KMP_MIC_SUPPORTED
|
#if KMP_MIC_SUPPORTED
|
||||||
if (__kmp_mic_type != non_mic) {
|
if (__kmp_mic_type != non_mic) {
|
||||||
if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
|
if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
|
||||||
KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine");
|
KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine");
|
||||||
}
|
}
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
|
if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
|
||||||
KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core");
|
KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core");
|
||||||
}
|
}
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = KMP_HW_CORE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
@ -2475,31 +2482,8 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
|
||||||
} else {
|
} else {
|
||||||
__kmp_str_buf_print(buffer, "%s,", "norespect");
|
__kmp_str_buf_print(buffer, "%s,", "norespect");
|
||||||
}
|
}
|
||||||
switch (__kmp_affinity_gran) {
|
__kmp_str_buf_print(buffer, "granularity=%s,",
|
||||||
case affinity_gran_default:
|
__kmp_hw_get_keyword(__kmp_affinity_gran, false));
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=default,");
|
|
||||||
break;
|
|
||||||
case affinity_gran_fine:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=fine,");
|
|
||||||
break;
|
|
||||||
case affinity_gran_thread:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=thread,");
|
|
||||||
break;
|
|
||||||
case affinity_gran_core:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=core,");
|
|
||||||
break;
|
|
||||||
case affinity_gran_package:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=package,");
|
|
||||||
break;
|
|
||||||
case affinity_gran_node:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=node,");
|
|
||||||
break;
|
|
||||||
#if KMP_GROUP_AFFINITY
|
|
||||||
case affinity_gran_group:
|
|
||||||
__kmp_str_buf_print(buffer, "%s", "granularity=group,");
|
|
||||||
break;
|
|
||||||
#endif /* KMP_GROUP_AFFINITY */
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!KMP_AFFINITY_CAPABLE()) {
|
if (!KMP_AFFINITY_CAPABLE()) {
|
||||||
__kmp_str_buf_print(buffer, "%s", "disabled");
|
__kmp_str_buf_print(buffer, "%s", "disabled");
|
||||||
|
@ -2571,7 +2555,7 @@ static void __kmp_stg_parse_gomp_cpu_affinity(char const *name,
|
||||||
// GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=...
|
// GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=...
|
||||||
__kmp_affinity_proclist = temp_proclist;
|
__kmp_affinity_proclist = temp_proclist;
|
||||||
__kmp_affinity_type = affinity_explicit;
|
__kmp_affinity_type = affinity_explicit;
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
|
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
|
||||||
} else {
|
} else {
|
||||||
KMP_WARNING(AffSyntaxError, name);
|
KMP_WARNING(AffSyntaxError, name);
|
||||||
|
@ -2856,10 +2840,20 @@ static int __kmp_parse_place_list(const char *var, const char *env,
|
||||||
|
|
||||||
static void __kmp_stg_parse_places(char const *name, char const *value,
|
static void __kmp_stg_parse_places(char const *name, char const *value,
|
||||||
void *data) {
|
void *data) {
|
||||||
|
struct kmp_place_t {
|
||||||
|
const char *name;
|
||||||
|
kmp_hw_t type;
|
||||||
|
};
|
||||||
int count;
|
int count;
|
||||||
|
bool set = false;
|
||||||
const char *scan = value;
|
const char *scan = value;
|
||||||
const char *next = scan;
|
const char *next = scan;
|
||||||
const char *kind = "\"threads\"";
|
const char *kind = "\"threads\"";
|
||||||
|
kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD},
|
||||||
|
{"cores", KMP_HW_CORE},
|
||||||
|
{"numa_domains", KMP_HW_NUMA},
|
||||||
|
{"ll_caches", KMP_HW_LLC},
|
||||||
|
{"sockets", KMP_HW_SOCKET}};
|
||||||
kmp_setting_t **rivals = (kmp_setting_t **)data;
|
kmp_setting_t **rivals = (kmp_setting_t **)data;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
@ -2868,52 +2862,47 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (__kmp_match_str("threads", scan, &next)) {
|
// Standard choices
|
||||||
|
for (size_t i = 0; i < sizeof(std_places) / sizeof(std_places[0]); ++i) {
|
||||||
|
const kmp_place_t &place = std_places[i];
|
||||||
|
if (__kmp_match_str(place.name, scan, &next)) {
|
||||||
scan = next;
|
scan = next;
|
||||||
__kmp_affinity_type = affinity_compact;
|
__kmp_affinity_type = affinity_compact;
|
||||||
__kmp_affinity_gran = affinity_gran_thread;
|
__kmp_affinity_gran = place.type;
|
||||||
__kmp_affinity_dups = FALSE;
|
__kmp_affinity_dups = FALSE;
|
||||||
kind = "\"threads\"";
|
set = true;
|
||||||
} else if (__kmp_match_str("cores", scan, &next)) {
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Implementation choices for OMP_PLACES based on internal types
|
||||||
|
if (!set) {
|
||||||
|
KMP_FOREACH_HW_TYPE(type) {
|
||||||
|
const char *name = __kmp_hw_get_keyword(type, true);
|
||||||
|
if (__kmp_match_str("unknowns", scan, &next))
|
||||||
|
continue;
|
||||||
|
if (__kmp_match_str(name, scan, &next)) {
|
||||||
scan = next;
|
scan = next;
|
||||||
__kmp_affinity_type = affinity_compact;
|
__kmp_affinity_type = affinity_compact;
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = type;
|
||||||
__kmp_affinity_dups = FALSE;
|
__kmp_affinity_dups = FALSE;
|
||||||
kind = "\"cores\"";
|
set = true;
|
||||||
#if KMP_USE_HWLOC
|
break;
|
||||||
} else if (__kmp_match_str("tiles", scan, &next)) {
|
}
|
||||||
scan = next;
|
}
|
||||||
__kmp_affinity_type = affinity_compact;
|
}
|
||||||
__kmp_affinity_gran = affinity_gran_tile;
|
if (!set) {
|
||||||
__kmp_affinity_dups = FALSE;
|
|
||||||
kind = "\"tiles\"";
|
|
||||||
#endif
|
|
||||||
} else if (__kmp_match_str("dice", scan, &next) ||
|
|
||||||
__kmp_match_str("dies", scan, &next)) {
|
|
||||||
scan = next;
|
|
||||||
__kmp_affinity_type = affinity_compact;
|
|
||||||
__kmp_affinity_gran = affinity_gran_die;
|
|
||||||
__kmp_affinity_dups = FALSE;
|
|
||||||
kind = "\"dice\"";
|
|
||||||
} else if (__kmp_match_str("sockets", scan, &next)) {
|
|
||||||
scan = next;
|
|
||||||
__kmp_affinity_type = affinity_compact;
|
|
||||||
__kmp_affinity_gran = affinity_gran_package;
|
|
||||||
__kmp_affinity_dups = FALSE;
|
|
||||||
kind = "\"sockets\"";
|
|
||||||
} else {
|
|
||||||
if (__kmp_affinity_proclist != NULL) {
|
if (__kmp_affinity_proclist != NULL) {
|
||||||
KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist);
|
KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist);
|
||||||
__kmp_affinity_proclist = NULL;
|
__kmp_affinity_proclist = NULL;
|
||||||
}
|
}
|
||||||
if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) {
|
if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) {
|
||||||
__kmp_affinity_type = affinity_explicit;
|
__kmp_affinity_type = affinity_explicit;
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
__kmp_affinity_dups = FALSE;
|
__kmp_affinity_dups = FALSE;
|
||||||
} else {
|
} else {
|
||||||
// Syntax error fallback
|
// Syntax error fallback
|
||||||
__kmp_affinity_type = affinity_compact;
|
__kmp_affinity_type = affinity_compact;
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = KMP_HW_CORE;
|
||||||
__kmp_affinity_dups = FALSE;
|
__kmp_affinity_dups = FALSE;
|
||||||
}
|
}
|
||||||
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
|
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
|
||||||
|
@ -2921,6 +2910,9 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (__kmp_affinity_gran != KMP_HW_UNKNOWN) {
|
||||||
|
kind = __kmp_hw_get_keyword(__kmp_affinity_gran);
|
||||||
|
}
|
||||||
|
|
||||||
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
|
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
|
||||||
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
|
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
|
||||||
|
@ -2985,31 +2977,12 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
|
||||||
} else {
|
} else {
|
||||||
num = 0;
|
num = 0;
|
||||||
}
|
}
|
||||||
if (__kmp_affinity_gran == affinity_gran_thread) {
|
if (__kmp_affinity_gran != KMP_HW_UNKNOWN) {
|
||||||
|
const char *name = __kmp_hw_get_keyword(__kmp_affinity_gran, true);
|
||||||
if (num > 0) {
|
if (num > 0) {
|
||||||
__kmp_str_buf_print(buffer, "='threads(%d)'\n", num);
|
__kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
|
||||||
} else {
|
} else {
|
||||||
__kmp_str_buf_print(buffer, "='threads'\n");
|
__kmp_str_buf_print(buffer, "='%s'\n", name);
|
||||||
}
|
|
||||||
} else if (__kmp_affinity_gran == affinity_gran_core) {
|
|
||||||
if (num > 0) {
|
|
||||||
__kmp_str_buf_print(buffer, "='cores(%d)' \n", num);
|
|
||||||
} else {
|
|
||||||
__kmp_str_buf_print(buffer, "='cores'\n");
|
|
||||||
}
|
|
||||||
#if KMP_USE_HWLOC
|
|
||||||
} else if (__kmp_affinity_gran == affinity_gran_tile) {
|
|
||||||
if (num > 0) {
|
|
||||||
__kmp_str_buf_print(buffer, "='tiles(%d)' \n", num);
|
|
||||||
} else {
|
|
||||||
__kmp_str_buf_print(buffer, "='tiles'\n");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
} else if (__kmp_affinity_gran == affinity_gran_package) {
|
|
||||||
if (num > 0) {
|
|
||||||
__kmp_str_buf_print(buffer, "='sockets(%d)'\n", num);
|
|
||||||
} else {
|
|
||||||
__kmp_str_buf_print(buffer, "='sockets'\n");
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
|
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
|
||||||
|
@ -3118,8 +3091,12 @@ static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||||
|
case affinity_top_method_x2apicid_1f:
|
||||||
|
value = "x2APIC id leaf 0x1f";
|
||||||
|
break;
|
||||||
|
|
||||||
case affinity_top_method_x2apicid:
|
case affinity_top_method_x2apicid:
|
||||||
value = "x2APIC id";
|
value = "x2APIC id leaf 0xb";
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case affinity_top_method_apicid:
|
case affinity_top_method_apicid:
|
||||||
|
@ -4727,12 +4704,92 @@ static void __kmp_stg_print_speculative_statsfile(kmp_str_buf_t *buffer,
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
|
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
|
||||||
|
// 2s16c,2t => 2S16C,2T => 2S16C \0 2T
|
||||||
|
|
||||||
// The longest observable sequence of items is
|
// Return KMP_HW_SUBSET preferred hardware type in case a token is ambiguously
|
||||||
// Socket-Node-Tile-Core-Thread
|
// short. The original KMP_HW_SUBSET environment variable had single letters:
|
||||||
// So, let's limit to 5 levels for now
|
// s, c, t for sockets, cores, threads repsectively.
|
||||||
|
static kmp_hw_t __kmp_hw_subset_break_tie(const kmp_hw_t *possible,
|
||||||
|
size_t num_possible) {
|
||||||
|
for (size_t i = 0; i < num_possible; ++i) {
|
||||||
|
if (possible[i] == KMP_HW_THREAD)
|
||||||
|
return KMP_HW_THREAD;
|
||||||
|
else if (possible[i] == KMP_HW_CORE)
|
||||||
|
return KMP_HW_CORE;
|
||||||
|
else if (possible[i] == KMP_HW_SOCKET)
|
||||||
|
return KMP_HW_SOCKET;
|
||||||
|
}
|
||||||
|
return KMP_HW_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return hardware type from string or HW_UNKNOWN if string cannot be parsed
|
||||||
|
// This algorithm is very forgiving to the user in that, the instant it can
|
||||||
|
// reduce the search space to one, it assumes that is the topology level the
|
||||||
|
// user wanted, even if it is misspelled later in the token.
|
||||||
|
static kmp_hw_t __kmp_stg_parse_hw_subset_name(char const *token) {
|
||||||
|
size_t index, num_possible, token_length;
|
||||||
|
kmp_hw_t possible[KMP_HW_LAST];
|
||||||
|
const char *end;
|
||||||
|
|
||||||
|
// Find the end of the hardware token string
|
||||||
|
end = token;
|
||||||
|
token_length = 0;
|
||||||
|
while (isalnum(*end) || *end == '_') {
|
||||||
|
token_length++;
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the possibilities to all hardware types
|
||||||
|
num_possible = 0;
|
||||||
|
KMP_FOREACH_HW_TYPE(type) { possible[num_possible++] = type; }
|
||||||
|
|
||||||
|
// Eliminate hardware types by comparing the front of the token
|
||||||
|
// with hardware names
|
||||||
|
// In most cases, the first letter in the token will indicate exactly
|
||||||
|
// which hardware type is parsed, e.g., 'C' = Core
|
||||||
|
index = 0;
|
||||||
|
while (num_possible > 1 && index < token_length) {
|
||||||
|
size_t n = num_possible;
|
||||||
|
char token_char = (char)toupper(token[index]);
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
const char *s;
|
||||||
|
kmp_hw_t type = possible[i];
|
||||||
|
s = __kmp_hw_get_keyword(type, false);
|
||||||
|
if (index < KMP_STRLEN(s)) {
|
||||||
|
char c = (char)toupper(s[index]);
|
||||||
|
// Mark hardware types for removal when the characters do not match
|
||||||
|
if (c != token_char) {
|
||||||
|
possible[i] = KMP_HW_UNKNOWN;
|
||||||
|
num_possible--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Remove hardware types that this token cannot be
|
||||||
|
size_t start = 0;
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
if (possible[i] != KMP_HW_UNKNOWN) {
|
||||||
|
kmp_hw_t temp = possible[i];
|
||||||
|
possible[i] = possible[start];
|
||||||
|
possible[start] = temp;
|
||||||
|
start++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
KMP_ASSERT(start == num_possible);
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to break a tie if user has very short token
|
||||||
|
// (e.g., is 'T' tile or thread?)
|
||||||
|
if (num_possible > 1)
|
||||||
|
return __kmp_hw_subset_break_tie(possible, num_possible);
|
||||||
|
if (num_possible == 1)
|
||||||
|
return possible[0];
|
||||||
|
return KMP_HW_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The longest observable sequence of items can only be HW_LAST length
|
||||||
// The input string is usually short enough, let's use 512 limit for now
|
// The input string is usually short enough, let's use 512 limit for now
|
||||||
#define MAX_T_LEVEL 5
|
#define MAX_T_LEVEL KMP_HW_LAST
|
||||||
#define MAX_STR_LEN 512
|
#define MAX_STR_LEN 512
|
||||||
static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
||||||
void *data) {
|
void *data) {
|
||||||
|
@ -4751,12 +4808,13 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
||||||
char input[MAX_STR_LEN];
|
char input[MAX_STR_LEN];
|
||||||
size_t len = 0, mlen = MAX_STR_LEN;
|
size_t len = 0, mlen = MAX_STR_LEN;
|
||||||
int level = 0;
|
int level = 0;
|
||||||
// Canonize the string (remove spaces, unify delimiters, etc.)
|
bool absolute = false;
|
||||||
|
// Canonicalize the string (remove spaces, unify delimiters, etc.)
|
||||||
char *pos = CCAST(char *, value);
|
char *pos = CCAST(char *, value);
|
||||||
while (*pos && mlen) {
|
while (*pos && mlen) {
|
||||||
if (*pos != ' ') { // skip spaces
|
if (*pos != ' ') { // skip spaces
|
||||||
if (len == 0 && *pos == ':') {
|
if (len == 0 && *pos == ':') {
|
||||||
__kmp_hws_abs_flag = 1; // if the first symbol is ":", skip it
|
absolute = true;
|
||||||
} else {
|
} else {
|
||||||
input[len] = (char)(toupper(*pos));
|
input[len] = (char)(toupper(*pos));
|
||||||
if (input[len] == 'X')
|
if (input[len] == 'X')
|
||||||
|
@ -4769,10 +4827,10 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
||||||
mlen--;
|
mlen--;
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
if (len == 0 || mlen == 0)
|
if (len == 0 || mlen == 0) {
|
||||||
goto err; // contents is either empty or too long
|
goto err; // contents is either empty or too long
|
||||||
|
}
|
||||||
input[len] = '\0';
|
input[len] = '\0';
|
||||||
__kmp_hws_requested = 1; // mark that subset requested
|
|
||||||
// Split by delimiter
|
// Split by delimiter
|
||||||
pos = input;
|
pos = input;
|
||||||
components[level++] = pos;
|
components[level++] = pos;
|
||||||
|
@ -4782,146 +4840,69 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
|
||||||
*pos = '\0'; // modify input and avoid more copying
|
*pos = '\0'; // modify input and avoid more copying
|
||||||
components[level++] = ++pos; // expect something after ","
|
components[level++] = ++pos; // expect something after ","
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__kmp_hw_subset = kmp_hw_subset_t::allocate();
|
||||||
|
if (absolute)
|
||||||
|
__kmp_hw_subset->set_absolute();
|
||||||
|
|
||||||
// Check each component
|
// Check each component
|
||||||
for (int i = 0; i < level; ++i) {
|
for (int i = 0; i < level; ++i) {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int num = atoi(components[i]); // each component should start with a number
|
int num = atoi(components[i]); // each component should start with a number
|
||||||
|
if (num <= 0) {
|
||||||
|
goto err; // only positive integers are valid for count
|
||||||
|
}
|
||||||
if ((pos = strchr(components[i], '@'))) {
|
if ((pos = strchr(components[i], '@'))) {
|
||||||
offset = atoi(pos + 1); // save offset
|
offset = atoi(pos + 1); // save offset
|
||||||
*pos = '\0'; // cut the offset from the component
|
*pos = '\0'; // cut the offset from the component
|
||||||
}
|
}
|
||||||
pos = components[i] + strspn(components[i], digits);
|
pos = components[i] + strspn(components[i], digits);
|
||||||
if (pos == components[i])
|
if (pos == components[i]) {
|
||||||
goto err;
|
goto err;
|
||||||
|
}
|
||||||
// detect the component type
|
// detect the component type
|
||||||
switch (*pos) {
|
kmp_hw_t type = __kmp_stg_parse_hw_subset_name(pos);
|
||||||
case 'S': // Socket
|
if (type == KMP_HW_UNKNOWN) {
|
||||||
if (__kmp_hws_socket.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_socket.num = num;
|
|
||||||
__kmp_hws_socket.offset = offset;
|
|
||||||
break;
|
|
||||||
case 'N': // NUMA Node
|
|
||||||
if (__kmp_hws_node.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_node.num = num;
|
|
||||||
__kmp_hws_node.offset = offset;
|
|
||||||
break;
|
|
||||||
case 'D': // Die
|
|
||||||
if (__kmp_hws_die.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_die.num = num;
|
|
||||||
__kmp_hws_die.offset = offset;
|
|
||||||
break;
|
|
||||||
case 'L': // Cache
|
|
||||||
if (*(pos + 1) == '2') { // L2 - Tile
|
|
||||||
if (__kmp_hws_tile.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_tile.num = num;
|
|
||||||
__kmp_hws_tile.offset = offset;
|
|
||||||
} else if (*(pos + 1) == '3') { // L3 - Socket
|
|
||||||
if (__kmp_hws_socket.num > 0 || __kmp_hws_die.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_socket.num = num;
|
|
||||||
__kmp_hws_socket.offset = offset;
|
|
||||||
} else if (*(pos + 1) == '1') { // L1 - Core
|
|
||||||
if (__kmp_hws_core.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_core.num = num;
|
|
||||||
__kmp_hws_core.offset = offset;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'C': // Core (or Cache?)
|
|
||||||
if (*(pos + 1) != 'A') {
|
|
||||||
if (__kmp_hws_core.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_core.num = num;
|
|
||||||
__kmp_hws_core.offset = offset;
|
|
||||||
} else { // Cache
|
|
||||||
char *d = pos + strcspn(pos, digits); // find digit
|
|
||||||
if (*d == '2') { // L2 - Tile
|
|
||||||
if (__kmp_hws_tile.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_tile.num = num;
|
|
||||||
__kmp_hws_tile.offset = offset;
|
|
||||||
} else if (*d == '3') { // L3 - Socket
|
|
||||||
if (__kmp_hws_socket.num > 0 || __kmp_hws_die.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_socket.num = num;
|
|
||||||
__kmp_hws_socket.offset = offset;
|
|
||||||
} else if (*d == '1') { // L1 - Core
|
|
||||||
if (__kmp_hws_core.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_core.num = num;
|
|
||||||
__kmp_hws_core.offset = offset;
|
|
||||||
} else {
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
}
|
if (__kmp_hw_subset->specified(type)) {
|
||||||
break;
|
|
||||||
case 'T': // Thread
|
|
||||||
if (__kmp_hws_proc.num > 0)
|
|
||||||
goto err; // duplicate is not allowed
|
|
||||||
__kmp_hws_proc.num = num;
|
|
||||||
__kmp_hws_proc.offset = offset;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
__kmp_hw_subset->push_back(num, type, offset);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
err:
|
err:
|
||||||
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
KMP_WARNING(AffHWSubsetInvalid, name, value);
|
||||||
__kmp_hws_requested = 0; // mark that subset not requested
|
if (__kmp_hw_subset) {
|
||||||
|
kmp_hw_subset_t::deallocate(__kmp_hw_subset);
|
||||||
|
__kmp_hw_subset = nullptr;
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
|
static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
|
||||||
void *data) {
|
void *data) {
|
||||||
if (__kmp_hws_requested) {
|
|
||||||
int comma = 0;
|
|
||||||
kmp_str_buf_t buf;
|
kmp_str_buf_t buf;
|
||||||
|
int depth;
|
||||||
|
if (!__kmp_hw_subset)
|
||||||
|
return;
|
||||||
__kmp_str_buf_init(&buf);
|
__kmp_str_buf_init(&buf);
|
||||||
if (__kmp_env_format)
|
if (__kmp_env_format)
|
||||||
KMP_STR_BUF_PRINT_NAME_EX(name);
|
KMP_STR_BUF_PRINT_NAME_EX(name);
|
||||||
else
|
else
|
||||||
__kmp_str_buf_print(buffer, " %s='", name);
|
__kmp_str_buf_print(buffer, " %s='", name);
|
||||||
if (__kmp_hws_socket.num) {
|
|
||||||
__kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num);
|
depth = __kmp_hw_subset->get_depth();
|
||||||
if (__kmp_hws_socket.offset)
|
for (int i = 0; i < depth; ++i) {
|
||||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset);
|
const auto &item = __kmp_hw_subset->at(i);
|
||||||
comma = 1;
|
__kmp_str_buf_print(&buf, "%s%d%s", (i > 0 ? "," : ""), item.num,
|
||||||
|
__kmp_hw_get_keyword(item.type));
|
||||||
|
if (item.offset)
|
||||||
|
__kmp_str_buf_print(&buf, "@%d", item.offset);
|
||||||
}
|
}
|
||||||
if (__kmp_hws_die.num) {
|
|
||||||
__kmp_str_buf_print(&buf, "%s%dd", comma ? "," : "", __kmp_hws_die.num);
|
|
||||||
if (__kmp_hws_die.offset)
|
|
||||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_die.offset);
|
|
||||||
comma = 1;
|
|
||||||
}
|
|
||||||
if (__kmp_hws_node.num) {
|
|
||||||
__kmp_str_buf_print(&buf, "%s%dn", comma ? "," : "", __kmp_hws_node.num);
|
|
||||||
if (__kmp_hws_node.offset)
|
|
||||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset);
|
|
||||||
comma = 1;
|
|
||||||
}
|
|
||||||
if (__kmp_hws_tile.num) {
|
|
||||||
__kmp_str_buf_print(&buf, "%s%dL2", comma ? "," : "", __kmp_hws_tile.num);
|
|
||||||
if (__kmp_hws_tile.offset)
|
|
||||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset);
|
|
||||||
comma = 1;
|
|
||||||
}
|
|
||||||
if (__kmp_hws_core.num) {
|
|
||||||
__kmp_str_buf_print(&buf, "%s%dc", comma ? "," : "", __kmp_hws_core.num);
|
|
||||||
if (__kmp_hws_core.offset)
|
|
||||||
__kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset);
|
|
||||||
comma = 1;
|
|
||||||
}
|
|
||||||
if (__kmp_hws_proc.num)
|
|
||||||
__kmp_str_buf_print(&buf, "%s%dt", comma ? "," : "", __kmp_hws_proc.num);
|
|
||||||
__kmp_str_buf_print(buffer, "%s'\n", buf.str);
|
__kmp_str_buf_print(buffer, "%s'\n", buf.str);
|
||||||
__kmp_str_buf_free(&buf);
|
__kmp_str_buf_free(&buf);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#if USE_ITT_BUILD
|
#if USE_ITT_BUILD
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
@ -5762,7 +5743,7 @@ void __kmp_env_initialize(char const *string) {
|
||||||
// Reset the affinity flags to their default values,
|
// Reset the affinity flags to their default values,
|
||||||
// in case this is called from kmp_set_defaults().
|
// in case this is called from kmp_set_defaults().
|
||||||
__kmp_affinity_type = affinity_default;
|
__kmp_affinity_type = affinity_default;
|
||||||
__kmp_affinity_gran = affinity_gran_default;
|
__kmp_affinity_gran = KMP_HW_UNKNOWN;
|
||||||
__kmp_affinity_top_method = affinity_top_method_default;
|
__kmp_affinity_top_method = affinity_top_method_default;
|
||||||
__kmp_affinity_respect_mask = affinity_respect_mask_default;
|
__kmp_affinity_respect_mask = affinity_respect_mask_default;
|
||||||
}
|
}
|
||||||
|
@ -5772,7 +5753,7 @@ void __kmp_env_initialize(char const *string) {
|
||||||
aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND");
|
aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND");
|
||||||
if (aff_str != NULL) {
|
if (aff_str != NULL) {
|
||||||
__kmp_affinity_type = affinity_default;
|
__kmp_affinity_type = affinity_default;
|
||||||
__kmp_affinity_gran = affinity_gran_default;
|
__kmp_affinity_gran = KMP_HW_UNKNOWN;
|
||||||
__kmp_affinity_top_method = affinity_top_method_default;
|
__kmp_affinity_top_method = affinity_top_method_default;
|
||||||
__kmp_affinity_respect_mask = affinity_respect_mask_default;
|
__kmp_affinity_respect_mask = affinity_respect_mask_default;
|
||||||
}
|
}
|
||||||
|
@ -5844,12 +5825,19 @@ void __kmp_env_initialize(char const *string) {
|
||||||
if (!TCR_4(__kmp_init_middle)) {
|
if (!TCR_4(__kmp_init_middle)) {
|
||||||
#if KMP_USE_HWLOC
|
#if KMP_USE_HWLOC
|
||||||
// Force using hwloc when either tiles or numa nodes requested within
|
// Force using hwloc when either tiles or numa nodes requested within
|
||||||
// KMP_HW_SUBSET and no other topology method is requested
|
// KMP_HW_SUBSET or granularity setting and no other topology method
|
||||||
if ((__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0 ||
|
// is requested
|
||||||
__kmp_affinity_gran == affinity_gran_tile) &&
|
if (__kmp_hw_subset &&
|
||||||
(__kmp_affinity_top_method == affinity_top_method_default)) {
|
__kmp_affinity_top_method == affinity_top_method_default)
|
||||||
|
if (__kmp_hw_subset->specified(KMP_HW_NUMA) ||
|
||||||
|
__kmp_hw_subset->specified(KMP_HW_TILE) ||
|
||||||
|
__kmp_affinity_gran == KMP_HW_TILE ||
|
||||||
|
__kmp_affinity_gran == KMP_HW_NUMA)
|
||||||
|
__kmp_affinity_top_method = affinity_top_method_hwloc;
|
||||||
|
// Force using hwloc when tiles or numa nodes requested for OMP_PLACES
|
||||||
|
if (__kmp_affinity_gran == KMP_HW_NUMA ||
|
||||||
|
__kmp_affinity_gran == KMP_HW_TILE)
|
||||||
__kmp_affinity_top_method = affinity_top_method_hwloc;
|
__kmp_affinity_top_method = affinity_top_method_hwloc;
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
// Determine if the machine/OS is actually capable of supporting
|
// Determine if the machine/OS is actually capable of supporting
|
||||||
// affinity.
|
// affinity.
|
||||||
|
@ -5879,7 +5867,7 @@ void __kmp_env_initialize(char const *string) {
|
||||||
}
|
}
|
||||||
__kmp_affinity_type = affinity_disabled;
|
__kmp_affinity_type = affinity_disabled;
|
||||||
__kmp_affinity_respect_mask = 0;
|
__kmp_affinity_respect_mask = 0;
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5937,44 +5925,27 @@ void __kmp_env_initialize(char const *string) {
|
||||||
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
|
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
|
||||||
}
|
}
|
||||||
if (__kmp_affinity_top_method == affinity_top_method_default) {
|
if (__kmp_affinity_top_method == affinity_top_method_default) {
|
||||||
if (__kmp_affinity_gran == affinity_gran_default) {
|
if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
|
||||||
__kmp_affinity_top_method = affinity_top_method_group;
|
__kmp_affinity_top_method = affinity_top_method_group;
|
||||||
__kmp_affinity_gran = affinity_gran_group;
|
__kmp_affinity_gran = KMP_HW_PROC_GROUP;
|
||||||
} else if (__kmp_affinity_gran == affinity_gran_group) {
|
} else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
|
||||||
__kmp_affinity_top_method = affinity_top_method_group;
|
__kmp_affinity_top_method = affinity_top_method_group;
|
||||||
} else {
|
} else {
|
||||||
__kmp_affinity_top_method = affinity_top_method_all;
|
__kmp_affinity_top_method = affinity_top_method_all;
|
||||||
}
|
}
|
||||||
} else if (__kmp_affinity_top_method == affinity_top_method_group) {
|
} else if (__kmp_affinity_top_method == affinity_top_method_group) {
|
||||||
if (__kmp_affinity_gran == affinity_gran_default) {
|
if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
|
||||||
__kmp_affinity_gran = affinity_gran_group;
|
__kmp_affinity_gran = KMP_HW_PROC_GROUP;
|
||||||
} else if ((__kmp_affinity_gran != affinity_gran_group) &&
|
} else if ((__kmp_affinity_gran != KMP_HW_PROC_GROUP) &&
|
||||||
(__kmp_affinity_gran != affinity_gran_fine) &&
|
(__kmp_affinity_gran != KMP_HW_THREAD)) {
|
||||||
(__kmp_affinity_gran != affinity_gran_thread)) {
|
const char *str = __kmp_hw_get_keyword(__kmp_affinity_gran);
|
||||||
const char *str = NULL;
|
|
||||||
switch (__kmp_affinity_gran) {
|
|
||||||
case affinity_gran_core:
|
|
||||||
str = "core";
|
|
||||||
break;
|
|
||||||
case affinity_gran_package:
|
|
||||||
str = "package";
|
|
||||||
break;
|
|
||||||
case affinity_gran_node:
|
|
||||||
str = "node";
|
|
||||||
break;
|
|
||||||
case affinity_gran_tile:
|
|
||||||
str = "tile";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
KMP_DEBUG_ASSERT(0);
|
|
||||||
}
|
|
||||||
KMP_WARNING(AffGranTopGroup, var, str);
|
KMP_WARNING(AffGranTopGroup, var, str);
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (__kmp_affinity_gran == affinity_gran_default) {
|
if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = KMP_HW_CORE;
|
||||||
} else if (__kmp_affinity_gran == affinity_gran_group) {
|
} else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
|
||||||
const char *str = NULL;
|
const char *str = NULL;
|
||||||
switch (__kmp_affinity_type) {
|
switch (__kmp_affinity_type) {
|
||||||
case affinity_physical:
|
case affinity_physical:
|
||||||
|
@ -5997,7 +5968,7 @@ void __kmp_env_initialize(char const *string) {
|
||||||
KMP_DEBUG_ASSERT(0);
|
KMP_DEBUG_ASSERT(0);
|
||||||
}
|
}
|
||||||
KMP_WARNING(AffGranGroupType, var, str);
|
KMP_WARNING(AffGranGroupType, var, str);
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = KMP_HW_CORE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
|
@ -6039,15 +6010,15 @@ void __kmp_env_initialize(char const *string) {
|
||||||
__kmp_affinity_type = affinity_none;
|
__kmp_affinity_type = affinity_none;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((__kmp_affinity_gran == affinity_gran_default) &&
|
if ((__kmp_affinity_gran == KMP_HW_UNKNOWN) &&
|
||||||
(__kmp_affinity_gran_levels < 0)) {
|
(__kmp_affinity_gran_levels < 0)) {
|
||||||
#if KMP_MIC_SUPPORTED
|
#if KMP_MIC_SUPPORTED
|
||||||
if (__kmp_mic_type != non_mic) {
|
if (__kmp_mic_type != non_mic) {
|
||||||
__kmp_affinity_gran = affinity_gran_fine;
|
__kmp_affinity_gran = KMP_HW_THREAD;
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
__kmp_affinity_gran = affinity_gran_core;
|
__kmp_affinity_gran = KMP_HW_CORE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (__kmp_affinity_top_method == affinity_top_method_default) {
|
if (__kmp_affinity_top_method == affinity_top_method_default) {
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
// RUN: %libomp-compile -D_GNU_SOURCE
|
||||||
|
// RUN: env KMP_AFFINITY=granularity=thread,compact %libomp-run
|
||||||
|
// RUN: env KMP_AFFINITY=granularity=core,compact %libomp-run
|
||||||
|
// RUN: env KMP_AFFINITY=granularity=socket,compact %libomp-run
|
||||||
|
// REQUIRES: linux
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "libomp_test_affinity.h"
|
||||||
|
#include "libomp_test_topology.h"
|
||||||
|
|
||||||
|
// Compare place lists. Make sure every place in p1 is in p2.
|
||||||
|
static int compare_places(const place_list_t *p1, const place_list_t *p2) {
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < p1->num_places; ++i) {
|
||||||
|
int found = 0;
|
||||||
|
for (j = 0; j < p2->num_places; ++j) {
|
||||||
|
if (affinity_mask_equal(p1->masks[i], p2->masks[j])) {
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
printf("Found place in p1 not in p2!\n");
|
||||||
|
printf("p1 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
printf("\n");
|
||||||
|
printf("p2 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_places() {
|
||||||
|
int status;
|
||||||
|
const char *value = getenv("KMP_AFFINITY");
|
||||||
|
if (!value) {
|
||||||
|
fprintf(stderr, "error: must set OMP_PLACES envirable for this test!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
place_list_t *places, *openmp_places;
|
||||||
|
if (strstr(value, "socket")) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_SOCKET);
|
||||||
|
} else if (strstr(value, "core")) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_CORE);
|
||||||
|
} else if (strstr(value, "thread")) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_THREAD);
|
||||||
|
} else {
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"error: KMP_AFFINITY granularity must be one of thread,core,socket!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
openmp_places = topology_alloc_openmp_places();
|
||||||
|
status = compare_places(openmp_places, places);
|
||||||
|
topology_free_places(places);
|
||||||
|
topology_free_places(openmp_places);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
if (!topology_using_full_mask()) {
|
||||||
|
printf("Thread does not have access to all logical processors. Skipping "
|
||||||
|
"test.\n");
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
return check_places();
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
// RUN: %libomp-compile -D_GNU_SOURCE
|
||||||
|
// RUN: env OMP_PLACES=threads %libomp-run
|
||||||
|
// RUN: env OMP_PLACES=cores %libomp-run
|
||||||
|
// RUN: env OMP_PLACES=sockets %libomp-run
|
||||||
|
// REQUIRES: linux
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "libomp_test_affinity.h"
|
||||||
|
#include "libomp_test_topology.h"
|
||||||
|
|
||||||
|
// Check openmp place list to make sure it follow KMP_HW_SUBSET restriction
|
||||||
|
static int compare_hw_subset_places(const place_list_t *openmp_places,
|
||||||
|
topology_obj_type_t type, int nsockets,
|
||||||
|
int ncores_per_socket,
|
||||||
|
int nthreads_per_core) {
|
||||||
|
int i, j, expected_total, expected_per_place;
|
||||||
|
if (type == TOPOLOGY_OBJ_THREAD) {
|
||||||
|
expected_total = nsockets * ncores_per_socket * nthreads_per_core;
|
||||||
|
expected_per_place = 1;
|
||||||
|
} else if (type == TOPOLOGY_OBJ_CORE) {
|
||||||
|
expected_total = nsockets * ncores_per_socket;
|
||||||
|
expected_per_place = nthreads_per_core;
|
||||||
|
} else {
|
||||||
|
expected_total = nsockets;
|
||||||
|
expected_per_place = ncores_per_socket;
|
||||||
|
}
|
||||||
|
if (openmp_places->num_places != expected_total) {
|
||||||
|
fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
|
||||||
|
printf("openmp_places places:\n");
|
||||||
|
topology_print_places(openmp_places);
|
||||||
|
printf("\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
for (i = 0; i < openmp_places->num_places; ++i) {
|
||||||
|
int count = affinity_mask_count(openmp_places->masks[i]);
|
||||||
|
if (count != expected_per_place) {
|
||||||
|
fprintf(stderr, "error: place %d has %d OS procs instead of %d\n", i,
|
||||||
|
count, expected_per_place);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_places() {
|
||||||
|
char buf[100];
|
||||||
|
topology_obj_type_t type;
|
||||||
|
const char *value;
|
||||||
|
int status = EXIT_SUCCESS;
|
||||||
|
place_list_t *threads, *cores, *sockets, *openmp_places;
|
||||||
|
threads = topology_alloc_type_places(TOPOLOGY_OBJ_THREAD);
|
||||||
|
cores = topology_alloc_type_places(TOPOLOGY_OBJ_CORE);
|
||||||
|
sockets = topology_alloc_type_places(TOPOLOGY_OBJ_SOCKET);
|
||||||
|
|
||||||
|
if (threads->num_places <= 1) {
|
||||||
|
printf("Only one hardware thread to execute on. Skipping test.\n");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
value = getenv("OMP_PLACES");
|
||||||
|
if (!value) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"error: OMP_PLACES must be set to one of threads,cores,sockets!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
if (strcmp(value, "threads") == 0)
|
||||||
|
type = TOPOLOGY_OBJ_THREAD;
|
||||||
|
else if (strcmp(value, "cores") == 0)
|
||||||
|
type = TOPOLOGY_OBJ_CORE;
|
||||||
|
else if (strcmp(value, "sockets") == 0)
|
||||||
|
type = TOPOLOGY_OBJ_SOCKET;
|
||||||
|
else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"error: OMP_PLACES must be one of threads,cores,sockets!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate of num threads per core, num cores per socket, & num sockets
|
||||||
|
if (cores->num_places <= 0) {
|
||||||
|
printf("Invalid number of cores (%d). Skipping test.\n", cores->num_places);
|
||||||
|
return status;
|
||||||
|
} else if (sockets->num_places <= 0) {
|
||||||
|
printf("Invalid number of sockets (%d). Skipping test.\n",
|
||||||
|
cores->num_places);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
int nthreads_per_core = threads->num_places / cores->num_places;
|
||||||
|
int ncores_per_socket = cores->num_places / sockets->num_places;
|
||||||
|
int nsockets = sockets->num_places;
|
||||||
|
|
||||||
|
if (nsockets * ncores_per_socket * nthreads_per_core != threads->num_places) {
|
||||||
|
printf("Only uniform topologies can be tested. Skipping test.\n");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use half the resources of every level
|
||||||
|
if (nthreads_per_core > 1)
|
||||||
|
nthreads_per_core /= 2;
|
||||||
|
if (ncores_per_socket > 1)
|
||||||
|
ncores_per_socket /= 2;
|
||||||
|
if (nsockets > 1)
|
||||||
|
nsockets /= 2;
|
||||||
|
|
||||||
|
snprintf(buf, sizeof(buf), "%ds,%dc,%dt", nsockets, ncores_per_socket,
|
||||||
|
nthreads_per_core);
|
||||||
|
setenv("KMP_HW_SUBSET", buf, 1);
|
||||||
|
|
||||||
|
openmp_places = topology_alloc_openmp_places();
|
||||||
|
status = compare_hw_subset_places(openmp_places, type, nsockets,
|
||||||
|
ncores_per_socket, nthreads_per_core);
|
||||||
|
topology_free_places(threads);
|
||||||
|
topology_free_places(cores);
|
||||||
|
topology_free_places(sockets);
|
||||||
|
topology_free_places(openmp_places);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
if (!topology_using_full_mask()) {
|
||||||
|
printf("Thread does not have access to all logical processors. Skipping "
|
||||||
|
"test.\n");
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
return check_places();
|
||||||
|
}
|
|
@ -0,0 +1,231 @@
|
||||||
|
#ifndef LIBOMP_TEST_TOPOLOGY_H
|
||||||
|
#define LIBOMP_TEST_TOPOLOGY_H
|
||||||
|
|
||||||
|
#include "libomp_test_affinity.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
typedef enum topology_obj_type_t {
|
||||||
|
TOPOLOGY_OBJ_THREAD,
|
||||||
|
TOPOLOGY_OBJ_CORE,
|
||||||
|
TOPOLOGY_OBJ_SOCKET,
|
||||||
|
TOPOLOGY_OBJ_MAX
|
||||||
|
} topology_obj_type_t;
|
||||||
|
|
||||||
|
typedef struct place_list_t {
|
||||||
|
int num_places;
|
||||||
|
affinity_mask_t **masks;
|
||||||
|
} place_list_t;
|
||||||
|
|
||||||
|
// Return the first character in file 'f' that is not a whitespace character
|
||||||
|
// including newlines and carriage returns
|
||||||
|
static int get_first_nonspace_from_file(FILE *f) {
|
||||||
|
int c;
|
||||||
|
do {
|
||||||
|
c = fgetc(f);
|
||||||
|
} while (c != EOF && (isspace(c) || c == '\n' || c == '\r'));
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read an integer from file 'f' into 'number'
|
||||||
|
// Return 1 on successful read of integer,
|
||||||
|
// 0 on unsuccessful read of integer,
|
||||||
|
// EOF on end of file.
|
||||||
|
static int get_integer_from_file(FILE *f, int *number) {
|
||||||
|
int n;
|
||||||
|
n = fscanf(f, "%d", number);
|
||||||
|
if (feof(f))
|
||||||
|
return EOF;
|
||||||
|
if (n != 1)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read a siblings list file from Linux /sys/devices/system/cpu/cpu?/topology/*
|
||||||
|
static affinity_mask_t *topology_get_mask_from_file(const char *filename) {
|
||||||
|
int status = EXIT_SUCCESS;
|
||||||
|
FILE *f = fopen(filename, "r");
|
||||||
|
if (!f) {
|
||||||
|
perror(filename);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
affinity_mask_t *mask = affinity_mask_alloc();
|
||||||
|
while (1) {
|
||||||
|
int c, i, n, lower, upper;
|
||||||
|
// Read the first integer
|
||||||
|
n = get_integer_from_file(f, &lower);
|
||||||
|
if (n == EOF) {
|
||||||
|
break;
|
||||||
|
} else if (n == 0) {
|
||||||
|
fprintf(stderr, "syntax error: expected integer\n");
|
||||||
|
status = EXIT_FAILURE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now either a , or -
|
||||||
|
c = get_first_nonspace_from_file(f);
|
||||||
|
if (c == EOF || c == ',') {
|
||||||
|
affinity_mask_set(mask, lower);
|
||||||
|
if (c == EOF)
|
||||||
|
break;
|
||||||
|
} else if (c == '-') {
|
||||||
|
n = get_integer_from_file(f, &upper);
|
||||||
|
if (n == EOF || n == 0) {
|
||||||
|
fprintf(stderr, "syntax error: expected integer\n");
|
||||||
|
status = EXIT_FAILURE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for (i = lower; i <= upper; ++i)
|
||||||
|
affinity_mask_set(mask, i);
|
||||||
|
c = get_first_nonspace_from_file(f);
|
||||||
|
if (c == EOF) {
|
||||||
|
break;
|
||||||
|
} else if (c == ',') {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "syntax error: unexpected character: '%c (%d)'\n", c,
|
||||||
|
c);
|
||||||
|
status = EXIT_FAILURE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "syntax error: unexpected character: '%c (%d)'\n", c, c);
|
||||||
|
status = EXIT_FAILURE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
if (status == EXIT_FAILURE) {
|
||||||
|
affinity_mask_free(mask);
|
||||||
|
mask = NULL;
|
||||||
|
}
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int topology_get_num_cpus() {
|
||||||
|
char buf[1024];
|
||||||
|
// Count the number of cpus
|
||||||
|
int cpu = 0;
|
||||||
|
while (1) {
|
||||||
|
snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu%d", cpu);
|
||||||
|
DIR *dir = opendir(buf);
|
||||||
|
if (dir) {
|
||||||
|
closedir(dir);
|
||||||
|
cpu++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cpu == 0)
|
||||||
|
cpu = 1;
|
||||||
|
return cpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return whether the current thread has access to all logical processors
|
||||||
|
static int topology_using_full_mask() {
|
||||||
|
int cpu;
|
||||||
|
int has_all = 1;
|
||||||
|
int num_cpus = topology_get_num_cpus();
|
||||||
|
affinity_mask_t *mask = affinity_mask_alloc();
|
||||||
|
get_thread_affinity(mask);
|
||||||
|
for (cpu = 0; cpu < num_cpus; ++cpu) {
|
||||||
|
if (!affinity_mask_isset(mask, cpu)) {
|
||||||
|
has_all = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
affinity_mask_free(mask);
|
||||||
|
return has_all;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return array of masks representing OMP_PLACES keyword (e.g., sockets, cores,
|
||||||
|
// threads)
|
||||||
|
static place_list_t *topology_alloc_type_places(topology_obj_type_t type) {
|
||||||
|
char buf[1024];
|
||||||
|
int i, cpu, num_places, num_unique;
|
||||||
|
int num_cpus = topology_get_num_cpus();
|
||||||
|
place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
|
||||||
|
affinity_mask_t **masks =
|
||||||
|
(affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_cpus);
|
||||||
|
num_unique = 0;
|
||||||
|
for (cpu = 0; cpu < num_cpus; ++cpu) {
|
||||||
|
affinity_mask_t *mask;
|
||||||
|
if (type == TOPOLOGY_OBJ_CORE) {
|
||||||
|
snprintf(buf, sizeof(buf),
|
||||||
|
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
|
||||||
|
cpu);
|
||||||
|
mask = topology_get_mask_from_file(buf);
|
||||||
|
} else if (type == TOPOLOGY_OBJ_SOCKET) {
|
||||||
|
snprintf(buf, sizeof(buf),
|
||||||
|
"/sys/devices/system/cpu/cpu%d/topology/core_siblings_list",
|
||||||
|
cpu);
|
||||||
|
mask = topology_get_mask_from_file(buf);
|
||||||
|
} else if (type == TOPOLOGY_OBJ_THREAD) {
|
||||||
|
mask = affinity_mask_alloc();
|
||||||
|
affinity_mask_set(mask, cpu);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Unknown topology type (%d)\n", (int)type);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
// Check for unique topology objects above the thread level
|
||||||
|
if (type != TOPOLOGY_OBJ_THREAD) {
|
||||||
|
for (i = 0; i < num_unique; ++i) {
|
||||||
|
if (affinity_mask_equal(masks[i], mask)) {
|
||||||
|
affinity_mask_free(mask);
|
||||||
|
mask = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (mask)
|
||||||
|
masks[num_unique++] = mask;
|
||||||
|
}
|
||||||
|
places->num_places = num_unique;
|
||||||
|
places->masks = masks;
|
||||||
|
return places;
|
||||||
|
}
|
||||||
|
|
||||||
|
static place_list_t *topology_alloc_openmp_places() {
|
||||||
|
int place, i;
|
||||||
|
int num_places = omp_get_num_places();
|
||||||
|
place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
|
||||||
|
affinity_mask_t **masks =
|
||||||
|
(affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places);
|
||||||
|
for (place = 0; place < num_places; ++place) {
|
||||||
|
int num_procs = omp_get_place_num_procs(place);
|
||||||
|
int *ids = (int *)malloc(sizeof(int) * num_procs);
|
||||||
|
omp_get_place_proc_ids(place, ids);
|
||||||
|
affinity_mask_t *mask = affinity_mask_alloc();
|
||||||
|
for (i = 0; i < num_procs; ++i)
|
||||||
|
affinity_mask_set(mask, ids[i]);
|
||||||
|
masks[place] = mask;
|
||||||
|
}
|
||||||
|
places->num_places = num_places;
|
||||||
|
places->masks = masks;
|
||||||
|
return places;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Free the array of masks from one of: topology_alloc_type_masks()
|
||||||
|
// or topology_alloc_openmp_masks()
|
||||||
|
static void topology_free_places(place_list_t *places) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < places->num_places; ++i)
|
||||||
|
affinity_mask_free(places->masks[i]);
|
||||||
|
free(places->masks);
|
||||||
|
free(places);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void topology_print_places(const place_list_t *p) {
|
||||||
|
int i;
|
||||||
|
char buf[1024];
|
||||||
|
for (i = 0; i < p->num_places; ++i) {
|
||||||
|
affinity_mask_snprintf(buf, sizeof(buf), p->masks[i]);
|
||||||
|
printf("Place %d: %s\n", i, buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,83 @@
|
||||||
|
// RUN: %libomp-compile -D_GNU_SOURCE
|
||||||
|
// RUN: env OMP_PLACES=threads %libomp-run
|
||||||
|
// RUN: env OMP_PLACES=cores %libomp-run
|
||||||
|
// RUN: env OMP_PLACES=sockets %libomp-run
|
||||||
|
// REQUIRES: linux
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "libomp_test_affinity.h"
|
||||||
|
#include "libomp_test_topology.h"
|
||||||
|
|
||||||
|
// Compare place lists. The order is not taken into consideration here.
|
||||||
|
// The OS detection might have the cores/sockets in a different
|
||||||
|
// order from the runtime.
|
||||||
|
static int compare_places(const place_list_t *p1, const place_list_t *p2) {
|
||||||
|
int i, j;
|
||||||
|
if (p1->num_places != p2->num_places) {
|
||||||
|
fprintf(stderr, "error: places do not have same number of places! (p1 has "
|
||||||
|
"%d, p2 has %d)\n",
|
||||||
|
p1->num_places, p2->num_places);
|
||||||
|
printf("p1 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
printf("\n");
|
||||||
|
printf("p2 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
for (i = 0; i < p1->num_places; ++i) {
|
||||||
|
int found = 0;
|
||||||
|
for (j = 0; j < p2->num_places; ++j) {
|
||||||
|
if (affinity_mask_equal(p1->masks[i], p2->masks[j])) {
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
printf("Found difference in places!\n");
|
||||||
|
printf("p1 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
printf("\n");
|
||||||
|
printf("p2 places:\n");
|
||||||
|
topology_print_places(p1);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_places() {
|
||||||
|
int status;
|
||||||
|
const char *value = getenv("OMP_PLACES");
|
||||||
|
if (!value) {
|
||||||
|
fprintf(stderr, "error: must set OMP_PLACES envirable for this test!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
place_list_t *places, *openmp_places;
|
||||||
|
if (strcmp(value, "sockets") == 0) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_SOCKET);
|
||||||
|
} else if (strcmp(value, "cores") == 0) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_CORE);
|
||||||
|
} else if (strcmp(value, "threads") == 0) {
|
||||||
|
places = topology_alloc_type_places(TOPOLOGY_OBJ_THREAD);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"error: OMP_PLACES must be one of threads,cores,sockets!\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
openmp_places = topology_alloc_openmp_places();
|
||||||
|
status = compare_places(places, openmp_places);
|
||||||
|
topology_free_places(places);
|
||||||
|
topology_free_places(openmp_places);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
if (!topology_using_full_mask()) {
|
||||||
|
printf("Thread does not have access to all logical processors. Skipping "
|
||||||
|
"test.\n");
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
return check_places();
|
||||||
|
}
|
Loading…
Reference in New Issue