forked from OSchip/llvm-project
Adding Hwloc library option for affinity mechanism
These changes allow libhwloc to be used as the topology discovery/affinity mechanism for libomp. It is supported on Unices. The code additions: * Canonicalize KMP_CPU_* interface macros so bitmask operations are implementation independent and work with both hwloc bitmaps and libomp bitmaps. So there are new KMP_CPU_ALLOC_* and KMP_CPU_ITERATE() macros and the like. These are all in kmp.h and appropriately placed. * Hwloc topology discovery code in kmp_affinity.cpp. This uses the hwloc interface to create a libomp address2os object which the rest of libomp knows how to handle already. * To build, use -DLIBOMP_USE_HWLOC=on and -DLIBOMP_HWLOC_INSTALL_DIR=/path/to/install/dir [default /usr/local]. If CMake can't find the library or hwloc.h, then it will tell you and exit. Differential Revision: http://reviews.llvm.org/D13991 llvm-svn: 254320
This commit is contained in:
parent
7a096596b2
commit
01dcf36bd5
|
@ -159,6 +159,18 @@ Should include stats-gathering code be included in the build?
|
|||
-DLIBOMP_USE_DEBUGGER=off|on
|
||||
Should the friendly debugger interface be included in the build?
|
||||
|
||||
-DLIBOMP_USE_HWLOC=off|on
|
||||
Should the Hwloc library be used for affinity?
|
||||
This option is not supported on Windows.
|
||||
http://www.open-mpi.org/projects/hwloc
|
||||
|
||||
-DLIBOMP_HWLOC_INSTALL_DIR=/path/to/hwloc/install/dir
|
||||
Default: /usr/local
|
||||
This option is only used if LIBOMP_USE_HWLOC is on.
|
||||
Specifies install location of Hwloc. The configuration system will look for
|
||||
hwloc.h in ${LIBOMP_HWLOC_INSTALL_DIR}/include and the library in
|
||||
${LIBOMP_HWLOC_INSTALL_DIR}/lib.
|
||||
|
||||
================================
|
||||
How to append flags to the build
|
||||
================================
|
||||
|
|
|
@ -135,6 +135,12 @@ set(LIBOMP_FFLAGS "" CACHE STRING
|
|||
set(LIBOMP_COPY_EXPORTS TRUE CACHE STRING
|
||||
"Should exports be copied into source exports/ directory?")
|
||||
|
||||
# HWLOC-support
|
||||
set(LIBOMP_USE_HWLOC FALSE CACHE BOOL
|
||||
"Use Hwloc (http://www.open-mpi.org/projects/hwloc/) library for affinity?")
|
||||
set(LIBOMP_HWLOC_INSTALL_DIR /usr/local CACHE PATH
|
||||
"Install path for hwloc library")
|
||||
|
||||
# Get the build number from kmp_version.c
|
||||
libomp_get_build_number("${CMAKE_CURRENT_SOURCE_DIR}" LIBOMP_VERSION_BUILD)
|
||||
math(EXPR LIBOMP_VERSION_BUILD_YEAR "${LIBOMP_VERSION_BUILD}/10000")
|
||||
|
@ -285,6 +291,11 @@ if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT))
|
|||
libomp_error_say("OpenMP Tools Interface requested but not available")
|
||||
endif()
|
||||
|
||||
# Error check hwloc support after config-ix has run
|
||||
if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC))
|
||||
libomp_error_say("Hwloc requested but not available")
|
||||
endif()
|
||||
|
||||
# Setting final library name
|
||||
set(LIBOMP_DEFAULT_LIB_NAME libomp)
|
||||
if(${PROFILE_LIBRARY})
|
||||
|
@ -323,6 +334,7 @@ if(${LIBOMP_STANDALONE_BUILD})
|
|||
endif()
|
||||
libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
|
||||
libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}")
|
||||
libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}")
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
|
|
|
@ -151,6 +151,7 @@ endfunction()
|
|||
function(libomp_get_libflags libflags)
|
||||
set(libflags_local)
|
||||
libomp_append(libflags_local "${CMAKE_THREAD_LIBS_INIT}")
|
||||
libomp_append(libflags_local "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
|
||||
if(${IA32})
|
||||
libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY)
|
||||
endif()
|
||||
|
|
|
@ -82,10 +82,13 @@ else() # (Unix based systems, Intel(R) MIC Architecture, and Mac)
|
|||
libomp_append(libomp_test_touch_cflags -m32 LIBOMP_HAVE_M32_FLAG)
|
||||
endif()
|
||||
libomp_append(libomp_test_touch_libs ${LIBOMP_OUTPUT_DIRECTORY}/${LIBOMP_LIB_FILE})
|
||||
libomp_append(libomp_test_touch_libs "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
|
||||
if(APPLE)
|
||||
set(libomp_test_touch_env "DYLD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{DYLD_LIBRARY_PATH}")
|
||||
libomp_append(libomp_test_touch_ldflags "-Wl,-rpath,${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
|
||||
else()
|
||||
set(libomp_test_touch_env "LD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}")
|
||||
libomp_append(libomp_test_touch_ldflags "-Wl,-rpath=${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
|
||||
endif()
|
||||
endif()
|
||||
macro(libomp_test_touch_recipe test_touch_dir)
|
||||
|
@ -169,8 +172,10 @@ add_custom_target(libomp-test-deps DEPENDS test-deps/.success)
|
|||
set(libomp_expected_library_deps)
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
set(libomp_expected_library_deps libc.so.7 libthr.so.3)
|
||||
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
|
||||
set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0)
|
||||
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
|
||||
elseif(APPLE)
|
||||
set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib)
|
||||
elseif(WIN32)
|
||||
|
@ -203,6 +208,7 @@ else()
|
|||
libomp_append(libomp_expected_library_deps ld64.so.1)
|
||||
endif()
|
||||
libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY)
|
||||
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
|
||||
endif()
|
||||
libomp_append(libomp_expected_library_deps libstdc++.so.6 LIBOMP_USE_STDCPPLIB)
|
||||
endif()
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
include(CheckCCompilerFlag)
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckIncludeFile)
|
||||
include(CheckLibraryExists)
|
||||
include(CheckIncludeFiles)
|
||||
include(LibompCheckLinkerFlag)
|
||||
|
@ -211,3 +212,25 @@ else()
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# Check if HWLOC support is available
|
||||
if(${LIBOMP_USE_HWLOC})
|
||||
if(WIN32)
|
||||
set(LIBOMP_HAVE_HWLOC FALSE)
|
||||
libomp_say("Using hwloc not supported on Windows yet")
|
||||
else()
|
||||
set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include)
|
||||
check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H)
|
||||
set(CMAKE_REQUIRED_INCLUDES)
|
||||
check_library_exists(hwloc hwloc_topology_init
|
||||
${LIBOMP_HWLOC_INSTALL_DIR}/lib LIBOMP_HAVE_LIBHWLOC)
|
||||
find_library(LIBOMP_HWLOC_LIBRARY hwloc ${LIBOMP_HWLOC_INSTALL_DIR}/lib)
|
||||
get_filename_component(LIBOMP_HWLOC_LIBRARY_DIR ${LIBOMP_HWLOC_LIBRARY} PATH)
|
||||
if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY)
|
||||
set(LIBOMP_HAVE_HWLOC TRUE)
|
||||
else()
|
||||
set(LIBOMP_HAVE_HWLOC FALSE)
|
||||
libomp_say("Could not find hwloc")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
@ -42,6 +42,9 @@ include_directories(
|
|||
${LIBOMP_INC_DIR}
|
||||
${LIBOMP_SRC_DIR}/thirdparty/ittnotify
|
||||
)
|
||||
if(${LIBOMP_USE_HWLOC})
|
||||
include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
|
||||
endif()
|
||||
|
||||
# Getting correct source files to build library
|
||||
set(LIBOMP_CFILES)
|
||||
|
|
|
@ -405,6 +405,9 @@ AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_T
|
|||
AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
|
||||
AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested."
|
||||
AffThrPlaceDeprecated "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value."
|
||||
AffUsingHwloc "%1$s: Affinity capable, using hwloc."
|
||||
AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism."
|
||||
AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------------------------------
|
||||
|
|
|
@ -77,10 +77,18 @@
|
|||
|
||||
#include "kmp_os.h"
|
||||
|
||||
#include "kmp_safe_c_api.h"
|
||||
|
||||
#if KMP_STATS_ENABLED
|
||||
class kmp_stats_list;
|
||||
#endif
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
#include "hwloc.h"
|
||||
extern hwloc_topology_t __kmp_hwloc_topology;
|
||||
extern int __kmp_hwloc_error;
|
||||
#endif
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
@ -488,6 +496,78 @@ extern size_t __kmp_affin_mask_size;
|
|||
# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
|
||||
# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
|
||||
typedef hwloc_cpuset_t kmp_affin_mask_t;
|
||||
# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
|
||||
# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
|
||||
# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
|
||||
# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask)
|
||||
# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
|
||||
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
|
||||
{ \
|
||||
unsigned i; \
|
||||
for(i=0;i<(unsigned)max_bit_number+1;i++) { \
|
||||
if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
|
||||
hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
|
||||
} else { \
|
||||
hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
|
||||
# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
|
||||
# define KMP_CPU_SET_ITERATE(i,mask) \
|
||||
for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
|
||||
|
||||
# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
|
||||
# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
|
||||
# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
|
||||
# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
|
||||
# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
|
||||
# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
|
||||
|
||||
//
|
||||
// The following macro should be used to index an array of masks.
|
||||
// The array should be declared as "kmp_affinity_t *" and allocated with
|
||||
// size "__kmp_affinity_mask_size * len". The macro takes care of the fact
|
||||
// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
|
||||
// on Linux* OS, sizeof(kmp_affin_t) is 1.
|
||||
//
|
||||
# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
|
||||
# define KMP_CPU_ALLOC_ARRAY(arr, n) { \
|
||||
arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
|
||||
unsigned i; \
|
||||
for(i=0;i<(unsigned)n;i++) { \
|
||||
arr[i] = hwloc_bitmap_alloc(); \
|
||||
} \
|
||||
}
|
||||
# define KMP_CPU_FREE_ARRAY(arr, n) { \
|
||||
unsigned i; \
|
||||
for(i=0;i<(unsigned)n;i++) { \
|
||||
hwloc_bitmap_free(arr[i]); \
|
||||
} \
|
||||
__kmp_free(arr); \
|
||||
}
|
||||
# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \
|
||||
arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
|
||||
unsigned i; \
|
||||
for(i=0;i<(unsigned)n;i++) { \
|
||||
arr[i] = hwloc_bitmap_alloc(); \
|
||||
} \
|
||||
}
|
||||
# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
|
||||
unsigned i; \
|
||||
for(i=0;i<(unsigned)n;i++) { \
|
||||
hwloc_bitmap_free(arr[i]); \
|
||||
} \
|
||||
KMP_INTERNAL_FREE(arr); \
|
||||
}
|
||||
|
||||
#else /* KMP_USE_HWLOC */
|
||||
# define KMP_CPU_SET_ITERATE(i,mask) \
|
||||
for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
|
||||
|
||||
# if KMP_OS_LINUX
|
||||
//
|
||||
// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
|
||||
|
@ -526,7 +606,7 @@ typedef unsigned char kmp_affin_mask_t;
|
|||
} \
|
||||
}
|
||||
|
||||
# define KMP_CPU_COMPLEMENT(mask) \
|
||||
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
|
||||
{ \
|
||||
size_t __i; \
|
||||
for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
|
||||
|
@ -605,7 +685,7 @@ extern int __kmp_num_proc_groups;
|
|||
} \
|
||||
}
|
||||
|
||||
# define KMP_CPU_COMPLEMENT(mask) \
|
||||
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
|
||||
{ \
|
||||
int __i; \
|
||||
for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
|
||||
|
@ -637,7 +717,7 @@ extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
|
|||
|
||||
extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
|
||||
|
||||
# else
|
||||
# else /* KMP_GROUP_AFFINITY */
|
||||
|
||||
typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
|
||||
|
||||
|
@ -646,7 +726,7 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
|
|||
# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
|
||||
# define KMP_CPU_ZERO(mask) (*(mask) = 0)
|
||||
# define KMP_CPU_COPY(dest, src) (*(dest) = *(src))
|
||||
# define KMP_CPU_COMPLEMENT(mask) (*(mask) = ~*(mask))
|
||||
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask))
|
||||
# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
|
||||
|
||||
# endif /* KMP_GROUP_AFFINITY */
|
||||
|
@ -660,6 +740,10 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
|
|||
# define KMP_CPU_ALLOC(ptr) \
|
||||
(ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
|
||||
# define KMP_CPU_FREE(ptr) __kmp_free(ptr)
|
||||
# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
|
||||
# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
|
||||
# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
|
||||
# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr)
|
||||
|
||||
//
|
||||
// The following macro should be used to index an array of masks.
|
||||
|
@ -670,6 +754,12 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
|
|||
//
|
||||
# define KMP_CPU_INDEX(array,i) \
|
||||
((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
|
||||
# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
|
||||
# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
|
||||
# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
|
||||
# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
|
||||
|
||||
#endif /* KMP_USE_HWLOC */
|
||||
|
||||
//
|
||||
// Declare local char buffers with this size for printing debug and info
|
||||
|
@ -716,6 +806,9 @@ enum affinity_top_method {
|
|||
affinity_top_method_group,
|
||||
#endif /* KMP_GROUP_AFFINITY */
|
||||
affinity_top_method_flat,
|
||||
#if KMP_USE_HWLOC
|
||||
affinity_top_method_hwloc,
|
||||
#endif
|
||||
affinity_top_method_default
|
||||
};
|
||||
|
||||
|
|
|
@ -50,6 +50,50 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
|
|||
//
|
||||
// Print the affinity mask to the character array in a pretty format.
|
||||
//
|
||||
#if KMP_USE_HWLOC
|
||||
char *
|
||||
__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
|
||||
{
|
||||
int num_chars_to_write, num_chars_written;
|
||||
char* scan;
|
||||
KMP_ASSERT(buf_len >= 40);
|
||||
|
||||
// bufsize of 0 just retrieves the needed buffer size.
|
||||
num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask);
|
||||
|
||||
// need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes
|
||||
// * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not
|
||||
// take into account the '\0' character.
|
||||
if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) {
|
||||
KMP_SNPRINTF(buf, buf_len, "{<empty>}");
|
||||
} else if(num_chars_to_write < buf_len - 3) {
|
||||
// no problem fitting the mask into buf_len number of characters
|
||||
buf[0] = '{';
|
||||
// use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer
|
||||
num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask);
|
||||
buf[num_chars_written+1] = '}';
|
||||
buf[num_chars_written+2] = '\0';
|
||||
} else {
|
||||
// Need to truncate the affinity mask string and add ellipsis.
|
||||
// To do this, we first write out the '{' + str(mask)
|
||||
buf[0] = '{';
|
||||
hwloc_bitmap_list_snprintf(buf+1, buf_len-7, (hwloc_bitmap_t)mask);
|
||||
// then, what we do here is go to the 7th to last character, then go backwards until we are NOT
|
||||
// on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't
|
||||
// overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get
|
||||
// { 45, 67,...} instead.
|
||||
scan = buf + buf_len - 7;
|
||||
while(*scan >= '0' && *scan <= '9' && scan >= buf)
|
||||
scan--;
|
||||
*(scan+1) = '.';
|
||||
*(scan+2) = '.';
|
||||
*(scan+3) = '.';
|
||||
*(scan+4) = '}';
|
||||
*(scan+5) = '\0';
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
#else
|
||||
char *
|
||||
__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
|
||||
{
|
||||
|
@ -102,6 +146,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
|
|||
KMP_ASSERT(scan <= end);
|
||||
return buf;
|
||||
}
|
||||
#endif // KMP_USE_HWLOC
|
||||
|
||||
|
||||
void
|
||||
|
@ -263,6 +308,291 @@ __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
|
|||
}
|
||||
}
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
static int
|
||||
__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
|
||||
kmp_i18n_id_t *const msg_id)
|
||||
{
|
||||
*address2os = NULL;
|
||||
*msg_id = kmp_i18n_null;
|
||||
|
||||
//
|
||||
// Save the affinity mask for the current thread.
|
||||
//
|
||||
kmp_affin_mask_t *oldMask;
|
||||
KMP_CPU_ALLOC(oldMask);
|
||||
__kmp_get_system_affinity(oldMask, TRUE);
|
||||
|
||||
unsigned depth = hwloc_topology_get_depth(__kmp_hwloc_topology);
|
||||
int threadLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_PU);
|
||||
int coreLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
|
||||
int pkgLevel = hwloc_get_type_depth(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
|
||||
__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 0;
|
||||
|
||||
//
|
||||
// This makes an assumption about the topology being four levels:
|
||||
// machines -> packages -> cores -> hardware threads
|
||||
//
|
||||
hwloc_obj_t current_level_iterator = hwloc_get_root_obj(__kmp_hwloc_topology);
|
||||
hwloc_obj_t child_iterator;
|
||||
for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
|
||||
child_iterator != NULL;
|
||||
child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
|
||||
{
|
||||
nPackages++;
|
||||
}
|
||||
current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, pkgLevel, 0);
|
||||
for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
|
||||
child_iterator != NULL;
|
||||
child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
|
||||
{
|
||||
nCoresPerPkg++;
|
||||
}
|
||||
current_level_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, coreLevel, 0);
|
||||
for(child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, NULL);
|
||||
child_iterator != NULL;
|
||||
child_iterator = hwloc_get_next_child(__kmp_hwloc_topology, current_level_iterator, child_iterator))
|
||||
{
|
||||
__kmp_nThreadsPerCore++;
|
||||
}
|
||||
|
||||
if (! KMP_AFFINITY_CAPABLE())
|
||||
{
|
||||
//
|
||||
// Hack to try and infer the machine topology using only the data
|
||||
// available from cpuid on the current thread, and __kmp_xproc.
|
||||
//
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
|
||||
__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
|
||||
nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
|
||||
if (__kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
|
||||
KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
|
||||
if (__kmp_affinity_uniform_topology()) {
|
||||
KMP_INFORM(Uniform, "KMP_AFFINITY");
|
||||
} else {
|
||||
KMP_INFORM(NonUniform, "KMP_AFFINITY");
|
||||
}
|
||||
KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Allocate the data structure to be returned.
|
||||
//
|
||||
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
|
||||
|
||||
unsigned num_hardware_threads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
|
||||
unsigned i;
|
||||
hwloc_obj_t hardware_thread_iterator;
|
||||
int nActiveThreads = 0;
|
||||
for(i=0;i<num_hardware_threads;i++) {
|
||||
hardware_thread_iterator = hwloc_get_obj_by_depth(__kmp_hwloc_topology, threadLevel, i);
|
||||
Address addr(3);
|
||||
if(! KMP_CPU_ISSET(i, fullMask)) continue;
|
||||
addr.labels[0] = hardware_thread_iterator->parent->parent->logical_index;
|
||||
addr.labels[1] = hardware_thread_iterator->parent->logical_index % nCoresPerPkg;
|
||||
addr.labels[2] = hardware_thread_iterator->logical_index % __kmp_nThreadsPerCore;
|
||||
retval[nActiveThreads] = AddrUnsPair(addr, hardware_thread_iterator->os_index);
|
||||
nActiveThreads++;
|
||||
}
|
||||
|
||||
//
|
||||
// If there's only one thread context to bind to, return now.
|
||||
//
|
||||
KMP_ASSERT(nActiveThreads > 0);
|
||||
if (nActiveThreads == 1) {
|
||||
__kmp_ncores = nPackages = 1;
|
||||
__kmp_nThreadsPerCore = nCoresPerPkg = 1;
|
||||
if (__kmp_affinity_verbose) {
|
||||
char buf[KMP_AFFIN_MASK_PRINT_LEN];
|
||||
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
|
||||
|
||||
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
|
||||
if (__kmp_affinity_respect_mask) {
|
||||
KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
|
||||
} else {
|
||||
KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
|
||||
}
|
||||
KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
|
||||
KMP_INFORM(Uniform, "KMP_AFFINITY");
|
||||
KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
}
|
||||
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
__kmp_free(retval);
|
||||
KMP_CPU_FREE(oldMask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Form an Address object which only includes the package level.
|
||||
//
|
||||
Address addr(1);
|
||||
addr.labels[0] = retval[0].first.labels[pkgLevel-1];
|
||||
retval[0].first = addr;
|
||||
|
||||
if (__kmp_affinity_gran_levels < 0) {
|
||||
__kmp_affinity_gran_levels = 0;
|
||||
}
|
||||
|
||||
if (__kmp_affinity_verbose) {
|
||||
__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
|
||||
}
|
||||
|
||||
*address2os = retval;
|
||||
KMP_CPU_FREE(oldMask);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Sort the table by physical Id.
|
||||
//
|
||||
qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
|
||||
|
||||
//
|
||||
// When affinity is off, this routine will still be called to set
|
||||
// __kmp_ncores, as well as __kmp_nThreadsPerCore,
|
||||
// nCoresPerPkg, & nPackages. Make sure all these vars are set
|
||||
// correctly, and return if affinity is not enabled.
|
||||
//
|
||||
__kmp_ncores = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, coreLevel);
|
||||
|
||||
//
|
||||
// Check to see if the machine topology is uniform
|
||||
//
|
||||
unsigned npackages = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, pkgLevel);
|
||||
unsigned ncores = __kmp_ncores;
|
||||
unsigned nthreads = hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology, threadLevel);
|
||||
unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
|
||||
|
||||
//
|
||||
// Print the machine topology summary.
|
||||
//
|
||||
if (__kmp_affinity_verbose) {
|
||||
char mask[KMP_AFFIN_MASK_PRINT_LEN];
|
||||
__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
|
||||
|
||||
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
|
||||
if (__kmp_affinity_respect_mask) {
|
||||
KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
|
||||
} else {
|
||||
KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
|
||||
}
|
||||
KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
|
||||
if (uniform) {
|
||||
KMP_INFORM(Uniform, "KMP_AFFINITY");
|
||||
} else {
|
||||
KMP_INFORM(NonUniform, "KMP_AFFINITY");
|
||||
}
|
||||
|
||||
kmp_str_buf_t buf;
|
||||
__kmp_str_buf_init(&buf);
|
||||
|
||||
__kmp_str_buf_print(&buf, "%d", npackages);
|
||||
//for (level = 1; level <= pkgLevel; level++) {
|
||||
// __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
|
||||
// }
|
||||
KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
|
||||
__kmp_nThreadsPerCore, __kmp_ncores);
|
||||
|
||||
__kmp_str_buf_free(&buf);
|
||||
}
|
||||
|
||||
if (__kmp_affinity_type == affinity_none) {
|
||||
KMP_CPU_FREE(oldMask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Find any levels with radiix 1, and remove them from the map
|
||||
// (except for the package level).
|
||||
//
|
||||
int new_depth = 0;
|
||||
int level;
|
||||
unsigned proc;
|
||||
for (level = 1; level < (int)depth; level++) {
|
||||
if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
|
||||
continue;
|
||||
}
|
||||
new_depth++;
|
||||
}
|
||||
|
||||
//
|
||||
// If we are removing any levels, allocate a new vector to return,
|
||||
// and copy the relevant information to it.
|
||||
//
|
||||
if (new_depth != depth-1) {
|
||||
AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
|
||||
sizeof(AddrUnsPair) * nActiveThreads);
|
||||
for (proc = 0; (int)proc < nActiveThreads; proc++) {
|
||||
Address addr(new_depth);
|
||||
new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
|
||||
}
|
||||
int new_level = 0;
|
||||
for (level = 1; level < (int)depth; level++) {
|
||||
if ((hwloc_get_nbobjs_by_depth(__kmp_hwloc_topology,level) == 1) && (level != pkgLevel)) {
|
||||
if (level == threadLevel) {
|
||||
threadLevel = -1;
|
||||
}
|
||||
else if ((threadLevel >= 0) && (level < threadLevel)) {
|
||||
threadLevel--;
|
||||
}
|
||||
if (level == coreLevel) {
|
||||
coreLevel = -1;
|
||||
}
|
||||
else if ((coreLevel >= 0) && (level < coreLevel)) {
|
||||
coreLevel--;
|
||||
}
|
||||
if (level < pkgLevel) {
|
||||
pkgLevel--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
for (proc = 0; (int)proc < nActiveThreads; proc++) {
|
||||
new_retval[proc].first.labels[new_level]
|
||||
= retval[proc].first.labels[level];
|
||||
}
|
||||
new_level++;
|
||||
}
|
||||
|
||||
__kmp_free(retval);
|
||||
retval = new_retval;
|
||||
depth = new_depth;
|
||||
}
|
||||
|
||||
if (__kmp_affinity_gran_levels < 0) {
|
||||
//
|
||||
// Set the granularity level based on what levels are modeled
|
||||
// in the machine topology map.
|
||||
//
|
||||
__kmp_affinity_gran_levels = 0;
|
||||
if ((threadLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
|
||||
__kmp_affinity_gran_levels++;
|
||||
}
|
||||
if ((coreLevel-1 >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
|
||||
__kmp_affinity_gran_levels++;
|
||||
}
|
||||
if (__kmp_affinity_gran > affinity_gran_package) {
|
||||
__kmp_affinity_gran_levels++;
|
||||
}
|
||||
}
|
||||
|
||||
if (__kmp_affinity_verbose) {
|
||||
__kmp_affinity_print_topology(retval, nActiveThreads, depth-1, pkgLevel-1,
|
||||
coreLevel-1, threadLevel-1);
|
||||
}
|
||||
|
||||
KMP_CPU_FREE(oldMask);
|
||||
*address2os = retval;
|
||||
if(depth == 0) return 0;
|
||||
else return depth-1;
|
||||
}
|
||||
#endif // KMP_USE_HWLOC
|
||||
|
||||
//
|
||||
// If we don't know how to retrieve the machine's processor topology, or
|
||||
|
@ -329,7 +659,7 @@ __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
|
|||
__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
|
||||
int avail_ct = 0;
|
||||
unsigned int i;
|
||||
for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
|
||||
KMP_CPU_SET_ITERATE(i, fullMask) {
|
||||
//
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
//
|
||||
|
@ -394,7 +724,7 @@ __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
|
|||
__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
|
||||
int avail_ct = 0;
|
||||
int i;
|
||||
for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
|
||||
KMP_CPU_SET_ITERATE(i, fullMask) {
|
||||
//
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
//
|
||||
|
@ -656,7 +986,7 @@ __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
|
|||
apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
|
||||
__kmp_avail_proc * sizeof(apicThreadInfo));
|
||||
unsigned nApics = 0;
|
||||
for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
|
||||
KMP_CPU_SET_ITERATE(i, fullMask) {
|
||||
//
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
//
|
||||
|
@ -1167,7 +1497,7 @@ __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
|
|||
//
|
||||
unsigned int proc;
|
||||
int nApics = 0;
|
||||
for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
|
||||
KMP_CPU_SET_ITERATE(proc, fullMask) {
|
||||
//
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
//
|
||||
|
@ -2282,8 +2612,8 @@ __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
|
|||
maxOsId = osId;
|
||||
}
|
||||
}
|
||||
kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
|
||||
(maxOsId + 1) * __kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *osId2Mask;
|
||||
KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
|
||||
|
||||
//
|
||||
// Sort the address2os table according to physical order. Doing so
|
||||
|
@ -2314,8 +2644,8 @@ __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
|
|||
unsigned j = 0; // index of 1st thread on core
|
||||
unsigned leader = 0;
|
||||
Address *leaderAddr = &(address2os[0].first);
|
||||
kmp_affin_mask_t *sum
|
||||
= (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *sum;
|
||||
KMP_CPU_ALLOC_ON_STACK(sum);
|
||||
KMP_CPU_ZERO(sum);
|
||||
KMP_CPU_SET(address2os[0].second, sum);
|
||||
for (i = 1; i < numAddrs; i++) {
|
||||
|
@ -2365,6 +2695,7 @@ __kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
|
|||
address2os[j].first.leader = (j == leader);
|
||||
}
|
||||
unique++;
|
||||
KMP_CPU_FREE_FROM_STACK(sum);
|
||||
|
||||
*maxIndex = maxOsId;
|
||||
*numUnique = unique;
|
||||
|
@ -2384,9 +2715,17 @@ static int nextNewMask;
|
|||
#define ADD_MASK(_mask) \
|
||||
{ \
|
||||
if (nextNewMask >= numNewMasks) { \
|
||||
int i; \
|
||||
numNewMasks *= 2; \
|
||||
newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
|
||||
numNewMasks * __kmp_affin_mask_size); \
|
||||
kmp_affin_mask_t* temp; \
|
||||
KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
|
||||
for(i=0;i<numNewMasks/2;i++) { \
|
||||
kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
|
||||
kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
|
||||
KMP_CPU_COPY(dest, src); \
|
||||
} \
|
||||
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
|
||||
newMasks = temp; \
|
||||
} \
|
||||
KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
|
||||
nextNewMask++; \
|
||||
|
@ -2416,6 +2755,7 @@ __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
|
|||
unsigned int *out_numMasks, const char *proclist,
|
||||
kmp_affin_mask_t *osId2Mask, int maxOsId)
|
||||
{
|
||||
int i;
|
||||
const char *scan = proclist;
|
||||
const char *next = proclist;
|
||||
|
||||
|
@ -2424,11 +2764,10 @@ __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
|
|||
// so that we can use realloc() to extend it.
|
||||
//
|
||||
numNewMasks = 2;
|
||||
newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
|
||||
* __kmp_affin_mask_size);
|
||||
KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
|
||||
nextNewMask = 0;
|
||||
kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
|
||||
__kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *sumMask;
|
||||
KMP_CPU_ALLOC(sumMask);
|
||||
int setSize = 0;
|
||||
|
||||
for (;;) {
|
||||
|
@ -2632,14 +2971,17 @@ __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
|
|||
*out_numMasks = nextNewMask;
|
||||
if (nextNewMask == 0) {
|
||||
*out_masks = NULL;
|
||||
KMP_INTERNAL_FREE(newMasks);
|
||||
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
|
||||
return;
|
||||
}
|
||||
*out_masks
|
||||
= (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
|
||||
KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
|
||||
__kmp_free(sumMask);
|
||||
KMP_INTERNAL_FREE(newMasks);
|
||||
KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
|
||||
for(i = 0; i < nextNewMask; i++) {
|
||||
kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
|
||||
kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
|
||||
KMP_CPU_COPY(dest, src);
|
||||
}
|
||||
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
|
||||
KMP_CPU_FREE(sumMask);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2834,7 +3176,7 @@ __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
|
|||
else if (**scan == '!') {
|
||||
(*scan)++; // skip '!'
|
||||
__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
|
||||
KMP_CPU_COMPLEMENT(tempMask);
|
||||
KMP_CPU_COMPLEMENT(maxOsId, tempMask);
|
||||
}
|
||||
else if ((**scan >= '0') && (**scan <= '9')) {
|
||||
next = *scan;
|
||||
|
@ -2866,17 +3208,23 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
unsigned int *out_numMasks, const char *placelist,
|
||||
kmp_affin_mask_t *osId2Mask, int maxOsId)
|
||||
{
|
||||
int i,j,count,stride,sign;
|
||||
const char *scan = placelist;
|
||||
const char *next = placelist;
|
||||
|
||||
numNewMasks = 2;
|
||||
newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
|
||||
* __kmp_affin_mask_size);
|
||||
KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
|
||||
nextNewMask = 0;
|
||||
|
||||
kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
|
||||
__kmp_affin_mask_size);
|
||||
// tempMask is modified based on the previous or initial
|
||||
// place to form the current place
|
||||
// previousMask contains the previous place
|
||||
kmp_affin_mask_t *tempMask;
|
||||
kmp_affin_mask_t *previousMask;
|
||||
KMP_CPU_ALLOC(tempMask);
|
||||
KMP_CPU_ZERO(tempMask);
|
||||
KMP_CPU_ALLOC(previousMask);
|
||||
KMP_CPU_ZERO(previousMask);
|
||||
int setSize = 0;
|
||||
|
||||
for (;;) {
|
||||
|
@ -2910,7 +3258,7 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
"bad explicit places list");
|
||||
next = scan;
|
||||
SKIP_DIGITS(next);
|
||||
int count = __kmp_str_to_int(scan, *next);
|
||||
count = __kmp_str_to_int(scan, *next);
|
||||
KMP_ASSERT(count >= 0);
|
||||
scan = next;
|
||||
|
||||
|
@ -2918,7 +3266,6 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
// valid follow sets are ',' ':' and EOL
|
||||
//
|
||||
SKIP_WS(scan);
|
||||
int stride;
|
||||
if (*scan == '\0' || *scan == ',') {
|
||||
stride = +1;
|
||||
}
|
||||
|
@ -2929,7 +3276,7 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
//
|
||||
// Read stride parameter
|
||||
//
|
||||
int sign = +1;
|
||||
sign = +1;
|
||||
for (;;) {
|
||||
SKIP_WS(scan);
|
||||
if (*scan == '+') {
|
||||
|
@ -2954,66 +3301,30 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
stride *= sign;
|
||||
}
|
||||
|
||||
if (stride > 0) {
|
||||
int i;
|
||||
for (i = 0; i < count; i++) {
|
||||
int j;
|
||||
if (setSize == 0) {
|
||||
break;
|
||||
}
|
||||
ADD_MASK(tempMask);
|
||||
setSize = 0;
|
||||
for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
|
||||
if (! KMP_CPU_ISSET(j - stride, tempMask)) {
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
}
|
||||
else if ((j > maxOsId) ||
|
||||
(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
|
||||
if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
|
||||
&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
|
||||
KMP_WARNING(AffIgnoreInvalidProcID, j);
|
||||
}
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
}
|
||||
else {
|
||||
KMP_CPU_SET(j, tempMask);
|
||||
setSize++;
|
||||
}
|
||||
}
|
||||
for (; j >= 0; j--) {
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
}
|
||||
// Add places determined by initial_place : count : stride
|
||||
for (i = 0; i < count; i++) {
|
||||
if (setSize == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
for (i = 0; i < count; i++) {
|
||||
int j;
|
||||
if (setSize == 0) {
|
||||
break;
|
||||
// Add the current place, then build the next place (tempMask) from that
|
||||
KMP_CPU_COPY(previousMask, tempMask);
|
||||
ADD_MASK(previousMask);
|
||||
KMP_CPU_ZERO(tempMask);
|
||||
setSize = 0;
|
||||
KMP_CPU_SET_ITERATE(j, previousMask) {
|
||||
if (! KMP_CPU_ISSET(j, previousMask)) {
|
||||
continue;
|
||||
}
|
||||
ADD_MASK(tempMask);
|
||||
setSize = 0;
|
||||
for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
|
||||
j++) {
|
||||
if (! KMP_CPU_ISSET(j - stride, tempMask)) {
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
}
|
||||
else if ((j > maxOsId) ||
|
||||
(! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
|
||||
if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
|
||||
&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
|
||||
KMP_WARNING(AffIgnoreInvalidProcID, j);
|
||||
}
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
}
|
||||
else {
|
||||
KMP_CPU_SET(j, tempMask);
|
||||
setSize++;
|
||||
else if ((j+stride > maxOsId) || (j+stride < 0) ||
|
||||
(! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
|
||||
if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
|
||||
&& (__kmp_affinity_type != affinity_none))) && i < count - 1) {
|
||||
KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
|
||||
}
|
||||
}
|
||||
for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
|
||||
KMP_CPU_CLR(j, tempMask);
|
||||
else {
|
||||
KMP_CPU_SET(j+stride, tempMask);
|
||||
setSize++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3038,14 +3349,18 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
|
|||
*out_numMasks = nextNewMask;
|
||||
if (nextNewMask == 0) {
|
||||
*out_masks = NULL;
|
||||
KMP_INTERNAL_FREE(newMasks);
|
||||
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
|
||||
return;
|
||||
}
|
||||
*out_masks
|
||||
= (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
|
||||
KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
|
||||
__kmp_free(tempMask);
|
||||
KMP_INTERNAL_FREE(newMasks);
|
||||
KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
|
||||
KMP_CPU_FREE(tempMask);
|
||||
KMP_CPU_FREE(previousMask);
|
||||
for(i = 0; i < nextNewMask; i++) {
|
||||
kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
|
||||
kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
|
||||
KMP_CPU_COPY(dest, src);
|
||||
}
|
||||
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
|
||||
}
|
||||
|
||||
# endif /* OMP_40_ENABLED */
|
||||
|
@ -3140,7 +3455,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
// processors that we know about on the machine.
|
||||
//
|
||||
if (fullMask == NULL) {
|
||||
fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
|
||||
KMP_CPU_ALLOC(fullMask);
|
||||
}
|
||||
if (KMP_AFFINITY_CAPABLE()) {
|
||||
if (__kmp_affinity_respect_mask) {
|
||||
|
@ -3151,7 +3466,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
//
|
||||
unsigned i;
|
||||
__kmp_avail_proc = 0;
|
||||
for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
|
||||
KMP_CPU_SET_ITERATE(i, fullMask) {
|
||||
if (! KMP_CPU_ISSET(i, fullMask)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -3193,39 +3508,60 @@ __kmp_aux_affinity_initialize(void)
|
|||
//
|
||||
const char *file_name = NULL;
|
||||
int line = 0;
|
||||
# if KMP_USE_HWLOC
|
||||
if (depth < 0) {
|
||||
if (__kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
|
||||
}
|
||||
if(!__kmp_hwloc_error) {
|
||||
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
} else if(depth < 0 && __kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
|
||||
}
|
||||
} else if(__kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
|
||||
if (__kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
|
||||
}
|
||||
|
||||
file_name = NULL;
|
||||
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (depth < 0) {
|
||||
if (__kmp_affinity_verbose) {
|
||||
if (msg_id != kmp_i18n_null) {
|
||||
KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
|
||||
KMP_I18N_STR(DecodingLegacyAPIC));
|
||||
}
|
||||
else {
|
||||
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
|
||||
}
|
||||
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
|
||||
}
|
||||
|
||||
file_name = NULL;
|
||||
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
|
||||
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (depth < 0) {
|
||||
if (__kmp_affinity_verbose) {
|
||||
if (msg_id != kmp_i18n_null) {
|
||||
KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
|
||||
KMP_I18N_STR(DecodingLegacyAPIC));
|
||||
}
|
||||
else {
|
||||
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
|
||||
}
|
||||
}
|
||||
|
||||
file_name = NULL;
|
||||
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
|
||||
|
@ -3430,6 +3766,50 @@ __kmp_aux_affinity_initialize(void)
|
|||
KMP_ASSERT(address2os != NULL);
|
||||
}
|
||||
|
||||
# if KMP_USE_HWLOC
|
||||
else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
|
||||
if (__kmp_affinity_verbose) {
|
||||
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
|
||||
}
|
||||
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
|
||||
if (depth == 0) {
|
||||
KMP_ASSERT(__kmp_affinity_type == affinity_none);
|
||||
KMP_ASSERT(address2os == NULL);
|
||||
return;
|
||||
}
|
||||
# if KMP_DEBUG
|
||||
AddrUnsPair *otheraddress2os = NULL;
|
||||
int otherdepth = -1;
|
||||
# if KMP_MIC
|
||||
otherdepth = __kmp_affinity_create_apicid_map(&otheraddress2os, &msg_id);
|
||||
# else
|
||||
otherdepth = __kmp_affinity_create_x2apicid_map(&otheraddress2os, &msg_id);
|
||||
# endif
|
||||
if(otheraddress2os != NULL && address2os != NULL) {
|
||||
int i;
|
||||
unsigned arent_equal_flag = 0;
|
||||
for(i=0;i<__kmp_avail_proc;i++) {
|
||||
if(otheraddress2os[i] != address2os[i]) arent_equal_flag = 1;
|
||||
}
|
||||
if(arent_equal_flag) {
|
||||
KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are different from APICID\n"));
|
||||
KA_TRACE(10, ("__kmp_aux_affinity_initialize: APICID Table:\n"));
|
||||
for(i=0;i<__kmp_avail_proc;i++) {
|
||||
otheraddress2os[i].print(); __kmp_printf("\n");
|
||||
}
|
||||
KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc Table:\n"));
|
||||
for(i=0;i<__kmp_avail_proc;i++) {
|
||||
address2os[i].print(); __kmp_printf("\n");
|
||||
}
|
||||
}
|
||||
else {
|
||||
KA_TRACE(10, ("__kmp_aux_affinity_initialize: Hwloc affinity places are same as APICID\n"));
|
||||
}
|
||||
}
|
||||
# endif // KMP_DEBUG
|
||||
}
|
||||
# endif // KMP_USE_HWLOC
|
||||
|
||||
if (address2os == NULL) {
|
||||
if (KMP_AFFINITY_CAPABLE()
|
||||
&& (__kmp_affinity_verbose || (__kmp_affinity_warnings
|
||||
|
@ -3608,8 +3988,7 @@ __kmp_aux_affinity_initialize(void)
|
|||
}
|
||||
# endif
|
||||
|
||||
__kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
|
||||
__kmp_affinity_num_masks * __kmp_affin_mask_size);
|
||||
KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
|
||||
|
||||
//
|
||||
// Sort the address2os table according to the current setting of
|
||||
|
@ -3679,7 +4058,7 @@ void
|
|||
__kmp_affinity_uninitialize(void)
|
||||
{
|
||||
if (__kmp_affinity_masks != NULL) {
|
||||
__kmp_free(__kmp_affinity_masks);
|
||||
KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
|
||||
__kmp_affinity_masks = NULL;
|
||||
}
|
||||
if (fullMask != NULL) {
|
||||
|
@ -3909,7 +4288,7 @@ __kmp_aux_set_affinity(void **mask)
|
|||
unsigned proc;
|
||||
int num_procs = 0;
|
||||
|
||||
for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
|
||||
KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
|
||||
if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
|
||||
continue;
|
||||
}
|
||||
|
@ -4027,7 +4406,11 @@ __kmp_aux_set_affinity_mask_proc(int proc, void **mask)
|
|||
}
|
||||
}
|
||||
|
||||
if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
|
||||
if ((proc < 0)
|
||||
# if !KMP_USE_HWLOC
|
||||
|| ((unsigned)proc >= KMP_CPU_SETSIZE)
|
||||
# endif
|
||||
) {
|
||||
return -1;
|
||||
}
|
||||
if (! KMP_CPU_ISSET(proc, fullMask)) {
|
||||
|
@ -4063,7 +4446,11 @@ __kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
|
|||
}
|
||||
}
|
||||
|
||||
if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
|
||||
if ((proc < 0)
|
||||
# if !KMP_USE_HWLOC
|
||||
|| ((unsigned)proc >= KMP_CPU_SETSIZE)
|
||||
# endif
|
||||
) {
|
||||
return -1;
|
||||
}
|
||||
if (! KMP_CPU_ISSET(proc, fullMask)) {
|
||||
|
@ -4099,8 +4486,12 @@ __kmp_aux_get_affinity_mask_proc(int proc, void **mask)
|
|||
}
|
||||
}
|
||||
|
||||
if ((proc < 0) || ((unsigned)proc >= KMP_CPU_SETSIZE)) {
|
||||
return 0;
|
||||
if ((proc < 0)
|
||||
# if !KMP_USE_HWLOC
|
||||
|| ((unsigned)proc >= KMP_CPU_SETSIZE)
|
||||
# endif
|
||||
) {
|
||||
return -1;
|
||||
}
|
||||
if (! KMP_CPU_ISSET(proc, fullMask)) {
|
||||
return 0;
|
||||
|
@ -4137,7 +4528,8 @@ void __kmp_balanced_affinity( int tid, int nthreads )
|
|||
KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal set affinity operation when not capable");
|
||||
|
||||
kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *mask;
|
||||
KMP_CPU_ALLOC_ON_STACK(mask);
|
||||
KMP_CPU_ZERO(mask);
|
||||
|
||||
// Granularity == thread
|
||||
|
@ -4158,9 +4550,11 @@ void __kmp_balanced_affinity( int tid, int nthreads )
|
|||
tid, buf);
|
||||
}
|
||||
__kmp_set_system_affinity( mask, TRUE );
|
||||
KMP_CPU_FREE_FROM_STACK(mask);
|
||||
} else { // Non-uniform topology
|
||||
|
||||
kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *mask;
|
||||
KMP_CPU_ALLOC_ON_STACK(mask);
|
||||
KMP_CPU_ZERO(mask);
|
||||
|
||||
// Number of hyper threads per core in HT machine
|
||||
|
@ -4334,6 +4728,7 @@ void __kmp_balanced_affinity( int tid, int nthreads )
|
|||
tid, buf);
|
||||
}
|
||||
__kmp_set_system_affinity( mask, TRUE );
|
||||
KMP_CPU_FREE_FROM_STACK(mask);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,13 @@ public:
|
|||
bool operator!=(const Address &b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
void print() const {
|
||||
unsigned i;
|
||||
printf("Depth: %u --- ", depth);
|
||||
for(i=0;i<depth;i++) {
|
||||
printf("%u ", labels[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class AddrUnsPair {
|
||||
|
@ -72,6 +79,18 @@ public:
|
|||
second = b.second;
|
||||
return *this;
|
||||
}
|
||||
void print() const {
|
||||
printf("first = "); first.print();
|
||||
printf(" --- second = %u", second);
|
||||
}
|
||||
bool operator==(const AddrUnsPair &b) const {
|
||||
if(first != b.first) return false;
|
||||
if(second != b.second) return false;
|
||||
return true;
|
||||
}
|
||||
bool operator!=(const AddrUnsPair &b) const {
|
||||
return !operator==(b);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -51,6 +51,8 @@
|
|||
#cmakedefine01 LIBOMP_ENABLE_ASSERTIONS
|
||||
#define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS
|
||||
#cmakedefine01 STUBS_LIBRARY
|
||||
#cmakedefine01 LIBOMP_USE_HWLOC
|
||||
#define KMP_USE_HWLOC LIBOMP_USE_HWLOC
|
||||
#define KMP_ARCH_STR "@LIBOMP_LEGAL_ARCH@"
|
||||
#define KMP_LIBRARY_FILE "@LIBOMP_LIB_FILE@"
|
||||
#define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@
|
||||
|
|
|
@ -257,7 +257,7 @@ FTN_GET_AFFINITY_MAX_PROC( void )
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if KMP_GROUP_AFFINITY
|
||||
#if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC
|
||||
if ( __kmp_num_proc_groups > 1 ) {
|
||||
return (int)KMP_CPU_SETSIZE;
|
||||
}
|
||||
|
@ -278,7 +278,11 @@ FTN_CREATE_AFFINITY_MASK( void **mask )
|
|||
if ( ! TCR_4(__kmp_init_middle) ) {
|
||||
__kmp_middle_initialize();
|
||||
}
|
||||
# if KMP_USE_HWLOC
|
||||
*mask = (hwloc_cpuset_t)hwloc_bitmap_alloc();
|
||||
# else
|
||||
*mask = kmpc_malloc( __kmp_affin_mask_size );
|
||||
# endif
|
||||
KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) );
|
||||
#endif
|
||||
}
|
||||
|
@ -300,7 +304,11 @@ FTN_DESTROY_AFFINITY_MASK( void **mask )
|
|||
KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" );
|
||||
}
|
||||
}
|
||||
# if KMP_USE_HWLOC
|
||||
hwloc_bitmap_free((hwloc_cpuset_t)(*mask));
|
||||
# else
|
||||
kmpc_free( *mask );
|
||||
# endif
|
||||
*mask = NULL;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -33,6 +33,10 @@ __thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
|
|||
// gives reference tick for all events (considered the 0 tick)
|
||||
tsc_tick_count __kmp_stats_start_time;
|
||||
#endif
|
||||
#if KMP_USE_HWLOC
|
||||
int __kmp_hwloc_error = FALSE;
|
||||
hwloc_topology_t __kmp_hwloc_topology = NULL;
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------- */
|
||||
/* INITIALIZATION VARIABLES */
|
||||
|
|
|
@ -3009,6 +3009,11 @@ __kmp_stg_parse_topology_method( char const * name, char const * value,
|
|||
else if ( __kmp_str_match( "flat", 1, value ) ) {
|
||||
__kmp_affinity_top_method = affinity_top_method_flat;
|
||||
}
|
||||
# if KMP_USE_HWLOC
|
||||
else if ( __kmp_str_match( "hwloc", 1, value) ) {
|
||||
__kmp_affinity_top_method = affinity_top_method_hwloc;
|
||||
}
|
||||
# endif
|
||||
else {
|
||||
KMP_WARNING( StgInvalidValue, name, value );
|
||||
}
|
||||
|
@ -5119,11 +5124,43 @@ __kmp_env_initialize( char const * string ) {
|
|||
// affinity.
|
||||
//
|
||||
const char *var = "KMP_AFFINITY";
|
||||
# if KMP_USE_HWLOC
|
||||
if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
|
||||
__kmp_hwloc_error = TRUE;
|
||||
if(__kmp_affinity_verbose)
|
||||
KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
|
||||
}
|
||||
hwloc_topology_ignore_type(__kmp_hwloc_topology, HWLOC_OBJ_CACHE);
|
||||
# endif
|
||||
if ( __kmp_affinity_type == affinity_disabled ) {
|
||||
KMP_AFFINITY_DISABLE();
|
||||
}
|
||||
else if ( ! KMP_AFFINITY_CAPABLE() ) {
|
||||
# if KMP_USE_HWLOC
|
||||
const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
|
||||
if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
|
||||
__kmp_hwloc_error = TRUE;
|
||||
if(__kmp_affinity_verbose)
|
||||
KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
|
||||
}
|
||||
// Is the system capable of setting/getting this thread's affinity?
|
||||
// also, is topology discovery possible? (pu indicates ability to discover processing units)
|
||||
// and finally, were there no errors when calling any hwloc_* API functions?
|
||||
if(topology_support->cpubind->set_thisthread_cpubind &&
|
||||
topology_support->cpubind->get_thisthread_cpubind &&
|
||||
topology_support->discovery->pu &&
|
||||
!__kmp_hwloc_error)
|
||||
{
|
||||
// enables affinity according to KMP_AFFINITY_CAPABLE() macro
|
||||
KMP_AFFINITY_ENABLE(TRUE);
|
||||
} else {
|
||||
// indicate that hwloc didn't work and disable affinity
|
||||
__kmp_hwloc_error = TRUE;
|
||||
KMP_AFFINITY_DISABLE();
|
||||
}
|
||||
# else
|
||||
__kmp_affinity_determine_capable( var );
|
||||
# endif // KMP_USE_HWLOC
|
||||
if ( ! KMP_AFFINITY_CAPABLE() ) {
|
||||
if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings
|
||||
&& ( __kmp_affinity_type != affinity_default )
|
||||
|
|
|
@ -175,8 +175,11 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
|
|||
{
|
||||
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal set affinity operation when not capable");
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
|
||||
#else
|
||||
int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
|
||||
#endif
|
||||
if (retval >= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -198,7 +201,11 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
|
|||
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal get affinity operation when not capable");
|
||||
|
||||
#if KMP_USE_HWLOC
|
||||
int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
|
||||
#else
|
||||
int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
|
||||
#endif
|
||||
if (retval >= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -220,10 +227,12 @@ __kmp_affinity_bind_thread( int which )
|
|||
KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
|
||||
"Illegal set affinity operation when not capable");
|
||||
|
||||
kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
|
||||
kmp_affin_mask_t *mask;
|
||||
KMP_CPU_ALLOC_ON_STACK(mask);
|
||||
KMP_CPU_ZERO(mask);
|
||||
KMP_CPU_SET(which, mask);
|
||||
__kmp_set_system_affinity(mask, TRUE);
|
||||
KMP_CPU_FREE_FROM_STACK(mask);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,12 +1,23 @@
|
|||
# CMakeLists.txt file for unit testing OpenMP Library
|
||||
include(FindPythonInterp)
|
||||
include(CheckTypeSize)
|
||||
|
||||
if(NOT PYTHONINTERP_FOUND)
|
||||
libomp_warning_say("Could not find Python.")
|
||||
libomp_warning_say("The check-libomp target will not be available!")
|
||||
return()
|
||||
endif()
|
||||
|
||||
macro(pythonize_bool var)
|
||||
if (${var})
|
||||
set(${var} True)
|
||||
else()
|
||||
set(${var} False)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
pythonize_bool(LIBOMP_USE_HWLOC)
|
||||
|
||||
set(LIBOMP_TEST_CFLAGS "" CACHE STRING
|
||||
"Extra compiler flags to send to the test compiler")
|
||||
|
||||
|
|
|
@ -9,11 +9,20 @@ if 'PYLINT_IMPORT' in os.environ:
|
|||
config = object()
|
||||
lit_config = object()
|
||||
|
||||
def append_dynamic_library_path(name, value, sep):
|
||||
if name in config.environment:
|
||||
config.environment[name] = value + sep + config.environment[name]
|
||||
def append_dynamic_library_path(path):
|
||||
if config.operating_system == 'Windows':
|
||||
name = 'PATH'
|
||||
sep = ';'
|
||||
elif config.operating_system == 'Darwin':
|
||||
name = 'DYLD_LIBRARY_PATH'
|
||||
sep = ':'
|
||||
else:
|
||||
config.environment[name] = value
|
||||
name = 'LD_LIBRARY_PATH'
|
||||
sep = ':'
|
||||
if name in config.environment:
|
||||
config.environment[name] = path + sep + config.environment[name]
|
||||
else:
|
||||
config.environment[name] = path
|
||||
|
||||
# name: The name of this test suite.
|
||||
config.name = 'libomp'
|
||||
|
@ -38,13 +47,15 @@ config.test_cflags = config.test_openmp_flag + \
|
|||
" " + config.test_extra_cflags
|
||||
|
||||
# Setup environment to find dynamic library at runtime
|
||||
if config.operating_system == 'Windows':
|
||||
append_dynamic_library_path('PATH', config.library_dir, ";")
|
||||
elif config.operating_system == 'Darwin':
|
||||
append_dynamic_library_path('DYLD_LIBRARY_PATH', config.library_dir, ":")
|
||||
append_dynamic_library_path(config.library_dir)
|
||||
if config.using_hwloc:
|
||||
append_dynamic_library_path(config.hwloc_library_dir)
|
||||
|
||||
# Rpath modifications for Darwin
|
||||
if config.operating_system == 'Darwin':
|
||||
config.test_cflags += " -Wl,-rpath," + config.library_dir
|
||||
else: # Unices
|
||||
append_dynamic_library_path('LD_LIBRARY_PATH', config.library_dir, ":")
|
||||
if config.using_hwloc:
|
||||
config.test_cflags += " -Wl,-rpath," + config.hwloc_library_dir
|
||||
|
||||
# substitutions
|
||||
config.substitutions.append(("%libomp-compile-and-run", \
|
||||
|
|
|
@ -7,6 +7,8 @@ config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
|
|||
config.library_dir = "@LIBOMP_LIBRARY_DIR@"
|
||||
config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
|
||||
config.operating_system = "@CMAKE_SYSTEM_NAME@"
|
||||
config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@"
|
||||
config.using_hwloc = @LIBOMP_USE_HWLOC@
|
||||
|
||||
# Let the main config do the real work.
|
||||
lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg")
|
||||
|
|
Loading…
Reference in New Issue