Teach OpenMP Library to use Hwloc on Windows

This patch allows a user to enable Hwloc on windows. There are three main
changes in here:
1.kmp.h - Move definitions/declarations out of KMP_OS_WINDOWS guard (our windows
          implementation of affinity) because they need to be defined when
          KMP_USE_HWLOC is on as well.
2.teach __kmp_set_system_affinity, __kmp_get_system_affinity,
        __kmp_get_proc_group, and __kmp_affinity_bind_thread how to use hwloc.
3.teach CMake how to include hwloc when building Windows

Another minor change in here is to make sure that anything under KMP_USE_HWLOC
is also guarded by KMP_AFFINITY_SUPPORTED as well. This is to prevent Mac
builds from requiring anything from Hwloc.

Differential Revision: http://reviews.llvm.org/D21441

llvm-svn: 272951
This commit is contained in:
Jonathan Peyton 2016-06-16 20:23:11 +00:00
parent c505ab6733
commit 0f3c2b921d
5 changed files with 116 additions and 63 deletions

View File

@ -246,23 +246,22 @@ endif()
# Check if HWLOC support is available
if(${LIBOMP_USE_HWLOC})
if(WIN32)
set(LIBOMP_HAVE_HWLOC FALSE)
libomp_say("Using hwloc not supported on Windows yet")
else()
set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include)
check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H)
set(CMAKE_REQUIRED_INCLUDES)
check_library_exists(hwloc hwloc_topology_init
set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include)
check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H)
set(CMAKE_REQUIRED_INCLUDES)
find_library(LIBOMP_HWLOC_LIBRARY
NAMES hwloc libhwloc
HINTS ${LIBOMP_HWLOC_INSTALL_DIR}/lib)
if(LIBOMP_HWLOC_LIBRARY)
check_library_exists(${LIBOMP_HWLOC_LIBRARY} hwloc_topology_init
${LIBOMP_HWLOC_INSTALL_DIR}/lib LIBOMP_HAVE_LIBHWLOC)
find_library(LIBOMP_HWLOC_LIBRARY hwloc ${LIBOMP_HWLOC_INSTALL_DIR}/lib)
get_filename_component(LIBOMP_HWLOC_LIBRARY_DIR ${LIBOMP_HWLOC_LIBRARY} PATH)
if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY)
set(LIBOMP_HAVE_HWLOC TRUE)
else()
set(LIBOMP_HAVE_HWLOC FALSE)
libomp_say("Could not find hwloc")
endif()
endif()
if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY)
set(LIBOMP_HAVE_HWLOC TRUE)
else()
set(LIBOMP_HAVE_HWLOC FALSE)
libomp_say("Could not find hwloc")
endif()
endif()

View File

@ -79,10 +79,8 @@
class kmp_stats_list;
#endif
#if KMP_USE_HWLOC
#include "hwloc.h"
extern hwloc_topology_t __kmp_hwloc_topology;
extern int __kmp_hwloc_error;
#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
# include "hwloc.h"
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
@ -522,14 +520,43 @@ typedef int PACKED_REDUCTION_METHOD_T;
*/
#if KMP_AFFINITY_SUPPORTED
# if KMP_GROUP_AFFINITY
// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
# if _MSC_VER < 1600
typedef struct GROUP_AFFINITY {
KAFFINITY Mask;
WORD Group;
WORD Reserved[3];
} GROUP_AFFINITY;
# endif /* _MSC_VER < 1600 */
extern int __kmp_num_proc_groups;
typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
# endif /* KMP_GROUP_AFFINITY */
extern size_t __kmp_affin_mask_size;
# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
# if !KMP_USE_HWLOC
# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
# define KMP_CPU_SET_ITERATE(i,mask) \
for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
# endif
#if KMP_USE_HWLOC
extern hwloc_topology_t __kmp_hwloc_topology;
extern int __kmp_hwloc_error;
typedef hwloc_cpuset_t kmp_affin_mask_t;
# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
@ -600,9 +627,6 @@ typedef hwloc_cpuset_t kmp_affin_mask_t;
}
#else /* KMP_USE_HWLOC */
# define KMP_CPU_SET_ITERATE(i,mask) \
for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
# if KMP_OS_LINUX
//
// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
@ -678,20 +702,8 @@ typedef unsigned char kmp_affin_mask_t;
//
# if KMP_GROUP_AFFINITY
// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
# if _MSC_VER < 1600
typedef struct GROUP_AFFINITY {
KAFFINITY Mask;
WORD Group;
WORD Reserved[3];
} GROUP_AFFINITY;
# endif
typedef DWORD_PTR kmp_affin_mask_t;
extern int __kmp_num_proc_groups;
# define _KMP_CPU_SET(i,mask) \
(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \
(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
@ -758,19 +770,6 @@ extern int __kmp_num_proc_groups;
} \
}
typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
# else /* KMP_GROUP_AFFINITY */
@ -817,6 +816,11 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
#endif /* KMP_USE_HWLOC */
// prototype after typedef of kmp_affin_mask_t
#if KMP_GROUP_AFFINITY
extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
#endif
//
// Declare local char buffers with this size for printing debug and info
// messages, using __kmp_affinity_print_mask().

View File

@ -270,9 +270,9 @@ FTN_GET_AFFINITY_MAX_PROC( void )
return 0;
}
#if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC
#if KMP_GROUP_AFFINITY
if ( __kmp_num_proc_groups > 1 ) {
return (int)KMP_CPU_SETSIZE;
return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT);
}
#endif /* KMP_GROUP_AFFINITY */
return __kmp_xproc;

View File

@ -35,10 +35,6 @@ __thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
// gives reference tick for all events (considered the 0 tick)
tsc_tick_count __kmp_stats_start_time;
#endif
#if KMP_USE_HWLOC
int __kmp_hwloc_error = FALSE;
hwloc_topology_t __kmp_hwloc_topology = NULL;
#endif
/* ----------------------------------------------------- */
/* INITIALIZATION VARIABLES */
@ -220,6 +216,11 @@ enum mic_type __kmp_mic_type = non_mic;
#if KMP_AFFINITY_SUPPORTED
# if KMP_USE_HWLOC
int __kmp_hwloc_error = FALSE;
hwloc_topology_t __kmp_hwloc_topology = NULL;
# endif
# if KMP_GROUP_AFFINITY
int __kmp_num_proc_groups = 1;

View File

@ -552,9 +552,18 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask )
int i;
int group = -1;
for (i = 0; i < __kmp_num_proc_groups; i++) {
#if KMP_USE_HWLOC
// On windows, the long type is always 32 bits
unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2);
unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1);
if (first_32_bits == 0 && second_32_bits == 0) {
continue;
}
#else
if (mask[i] == 0) {
continue;
}
#endif
if (group >= 0) {
return -1;
}
@ -568,8 +577,23 @@ __kmp_get_proc_group( kmp_affin_mask_t const *mask )
int
__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
{
#if KMP_GROUP_AFFINITY
#if KMP_USE_HWLOC
int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
if (retval >= 0) {
return 0;
}
int error = errno;
if (abort_on_error) {
__kmp_msg(
kmp_ms_fatal,
KMP_MSG( FatalSysError ),
KMP_ERR( error ),
__kmp_msg_null
);
}
return error;
#else
# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
//
@ -608,7 +632,7 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
}
else
#endif /* KMP_GROUP_AFFINITY */
# endif /* KMP_GROUP_AFFINITY */
{
if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
@ -624,14 +648,30 @@ __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
return error;
}
}
#endif /* KMP_USE_HWLOC */
return 0;
}
int
__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
{
#if KMP_GROUP_AFFINITY
#if KMP_USE_HWLOC
int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
if (retval >= 0) {
return 0;
}
int error = errno;
if (abort_on_error) {
__kmp_msg(
kmp_ms_fatal,
KMP_MSG( FatalSysError ),
KMP_ERR( error ),
__kmp_msg_null
);
}
return error;
#else /* KMP_USE_HWLOC */
# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
KMP_CPU_ZERO(mask);
@ -660,7 +700,7 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
}
else
#endif /* KMP_GROUP_AFFINITY */
# endif /* KMP_GROUP_AFFINITY */
{
kmp_affin_mask_t newMask, sysMask, retval;
@ -704,14 +744,22 @@ __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
}
*mask = retval;
}
#endif /* KMP_USE_HWLOC */
return 0;
}
void
__kmp_affinity_bind_thread( int proc )
{
#if KMP_GROUP_AFFINITY
#if KMP_USE_HWLOC
kmp_affin_mask_t *mask;
KMP_CPU_ALLOC_ON_STACK(mask);
KMP_CPU_ZERO(mask);
KMP_CPU_SET(proc, mask);
__kmp_set_system_affinity(mask, TRUE);
KMP_CPU_FREE_FROM_STACK(mask);
#else /* KMP_USE_HWLOC */
# if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
//
@ -740,7 +788,7 @@ __kmp_affinity_bind_thread( int proc )
}
else
#endif /* KMP_GROUP_AFFINITY */
# endif /* KMP_GROUP_AFFINITY */
{
kmp_affin_mask_t mask;
@ -748,6 +796,7 @@ __kmp_affinity_bind_thread( int proc )
KMP_CPU_SET(proc, &mask);
__kmp_set_system_affinity(&mask, TRUE);
}
#endif /* KMP_USE_HWLOC */
}
void