forked from OSchip/llvm-project
[OpenMP][libomp] Introduce oneAPI compiler support
Introduce KMP_COMPILER_ICX macro to represent compilation with oneAPI compiler. Fixup flag detection and compiler ID detection in CMake. Older CMake's detect IntelLLVM as Clang. Fix compiler warnings. Fixup many of the tests to have non-empty parallel regions as they are elided by oneAPI compiler.
This commit is contained in:
parent
aabf6e65fd
commit
1234011b80
|
@ -10,6 +10,7 @@ function(write_compiler_information lang)
|
|||
set(information "${information}\\;${CMAKE_${lang}_COMPILER_VERSION}")
|
||||
set(information "${information}\\;${${lang}_FLAGS}")
|
||||
set(information "${information}\\;${${lang}_HAS_TSAN_FLAG}")
|
||||
set(information "${information}\\;${${lang}_HAS_OMIT_FRAME_POINTER}")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${lang}CompilerInformation.txt ${information})
|
||||
endfunction(write_compiler_information)
|
||||
|
||||
|
@ -40,6 +41,9 @@ if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|||
add_experimental_isel_flag(CXX)
|
||||
endif()
|
||||
|
||||
check_c_compiler_flag("-fno-omit-frame-pointer" C_HAS_OMIT_FRAME_POINTER)
|
||||
check_cxx_compiler_flag("-fno-omit-frame-pointer" CXX_HAS_OMIT_FRAME_POINTER)
|
||||
|
||||
SET(CMAKE_REQUIRED_FLAGS "-fsanitize=thread")
|
||||
check_c_compiler_flag("" C_HAS_TSAN_FLAG)
|
||||
check_cxx_compiler_flag("" CXX_HAS_TSAN_FLAG)
|
||||
|
|
|
@ -76,12 +76,14 @@ macro(extract_test_compiler_information lang file)
|
|||
list(GET information 2 version)
|
||||
list(GET information 3 openmp_flags)
|
||||
list(GET information 4 has_tsan_flags)
|
||||
list(GET information 5 has_omit_frame_pointer_flags)
|
||||
|
||||
set(OPENMP_TEST_${lang}_COMPILER_PATH ${path})
|
||||
set(OPENMP_TEST_${lang}_COMPILER_ID ${id})
|
||||
set(OPENMP_TEST_${lang}_COMPILER_VERSION ${version})
|
||||
set(OPENMP_TEST_${lang}_COMPILER_OPENMP_FLAGS ${openmp_flags})
|
||||
set(OPENMP_TEST_${lang}_COMPILER_HAS_TSAN_FLAGS ${has_tsan_flags})
|
||||
set(OPENMP_TEST_${lang}_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS ${has_omit_frame_pointer_flags})
|
||||
endmacro()
|
||||
|
||||
# Function to set variables with information about the test compiler.
|
||||
|
@ -98,6 +100,7 @@ function(set_test_compiler_information dir)
|
|||
set(OPENMP_TEST_COMPILER_VERSION "${OPENMP_TEST_C_COMPILER_VERSION}" PARENT_SCOPE)
|
||||
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "${OPENMP_TEST_C_COMPILER_OPENMP_FLAGS}" PARENT_SCOPE)
|
||||
set(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS "${OPENMP_TEST_C_COMPILER_HAS_TSAN_FLAGS}" PARENT_SCOPE)
|
||||
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS "${OPENMP_TEST_C_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS}" PARENT_SCOPE)
|
||||
|
||||
# Determine major version.
|
||||
string(REGEX MATCH "[0-9]+" major "${OPENMP_TEST_C_COMPILER_VERSION}")
|
||||
|
@ -149,6 +152,7 @@ else()
|
|||
endif()
|
||||
# TODO: Implement blockaddress in GlobalISel and remove this flag!
|
||||
set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "-fopenmp ${OPENMP_TEST_COMPILER_THREAD_FLAGS} -fno-experimental-isel")
|
||||
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
|
||||
endif()
|
||||
|
||||
# Function to set compiler features for use in lit.
|
||||
|
|
|
@ -1,4 +1,20 @@
|
|||
include(CheckCXXCompilerFlag)
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
# Check for oneAPI compiler (some older CMake versions detect as Clang)
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||
check_cxx_source_compiles("#if (defined(__INTEL_CLANG_COMPILER) || defined(__INTEL_LLVM_COMPILER))
|
||||
int main() { return 0; }
|
||||
#else
|
||||
not oneAPI
|
||||
#endif" OPENMP_HAVE_ONEAPI_COMPILER)
|
||||
if (OPENMP_HAVE_ONEAPI_COMPILER)
|
||||
# According to CMake documentation, the compiler id should
|
||||
# be IntelLLVM when detected oneAPI
|
||||
set(CMAKE_C_COMPILER_ID "IntelLLVM")
|
||||
set(CMAKE_CXX_COMPILER_ID "IntelLLVM")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag(-Wall OPENMP_HAVE_WALL_FLAG)
|
||||
check_cxx_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG)
|
||||
|
|
|
@ -25,7 +25,7 @@ function(libomp_check_linker_flag flag boolean)
|
|||
add_library(foo SHARED src_to_link.c)")
|
||||
# Compiling as a part of runtimes introduces ARCH-unknown-linux-gnu as a part
|
||||
# of a working directory. So adding a guard for unknown.
|
||||
set(failed_regexes "[Ee]rror;[Uu]nknown[^-];[Ss]kipping;LINK : warning")
|
||||
set(failed_regexes "[Ee]rror;[Uu]nknown[^-];[Ss]kipping;LINK : warning;Unsupported command line")
|
||||
set(base_dir ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/link_flag_check_${boolean})
|
||||
file(MAKE_DIRECTORY ${base_dir})
|
||||
file(MAKE_DIRECTORY ${base_dir}/build)
|
||||
|
|
|
@ -139,7 +139,7 @@ elseif(NOT APPLE)
|
|||
endif()
|
||||
|
||||
# Check Intel(R) C Compiler specific flags
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "Intel")
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM")
|
||||
check_cxx_compiler_flag(/Qlong_double LIBOMP_HAVE_LONG_DOUBLE_FLAG)
|
||||
check_cxx_compiler_flag(/Qdiag-disable:177 LIBOMP_HAVE_DIAG_DISABLE_177_FLAG)
|
||||
check_cxx_compiler_flag(/Qinline-min-size=1 LIBOMP_HAVE_INLINE_MIN_SIZE_FLAG)
|
||||
|
@ -247,7 +247,7 @@ libomp_check_version_symbols(LIBOMP_HAVE_VERSION_SYMBOLS)
|
|||
# Check if quad precision types are available
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||
set(LIBOMP_HAVE_QUAD_PRECISION TRUE)
|
||||
elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel")
|
||||
elseif(CMAKE_C_COMPILER_ID STREQUAL "Intel" OR CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM")
|
||||
if(LIBOMP_HAVE_EXTENDED_FLOAT_TYPES_FLAG)
|
||||
set(LIBOMP_HAVE_QUAD_PRECISION TRUE)
|
||||
else()
|
||||
|
|
|
@ -1124,7 +1124,7 @@ extern void __kmp_init_target_mem();
|
|||
#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
|
||||
// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
|
||||
extern kmp_uint64 __kmp_ticks_per_msec;
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
#define KMP_NOW() ((kmp_uint64)_rdtsc())
|
||||
#else
|
||||
#define KMP_NOW() __kmp_hardware_timestamp()
|
||||
|
|
|
@ -1765,7 +1765,7 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
|
|||
|
||||
hw_thread_index = 0;
|
||||
pu = NULL;
|
||||
while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
|
||||
while ((pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu))) {
|
||||
int index = depth - 1;
|
||||
bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
|
||||
kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
|
||||
|
|
|
@ -2452,6 +2452,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
|
|||
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
|
||||
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
|
||||
TYPE new_value; \
|
||||
(void)new_value; \
|
||||
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
|
||||
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
|
||||
}
|
||||
|
@ -2461,6 +2462,7 @@ ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
|
|||
LCK_ID, GOMP_FLAG) \
|
||||
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
|
||||
TYPE new_value; \
|
||||
(void)new_value; \
|
||||
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
|
||||
OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
|
||||
}
|
||||
|
@ -3162,6 +3164,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
|
|||
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
|
||||
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
|
||||
TYPE new_value; \
|
||||
(void)new_value; \
|
||||
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
|
||||
OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
|
||||
}
|
||||
|
@ -3171,6 +3174,7 @@ ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
|
|||
LCK_ID, GOMP_FLAG) \
|
||||
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
|
||||
TYPE new_value; \
|
||||
(void)new_value; \
|
||||
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
|
||||
OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
|
||||
}
|
||||
|
|
|
@ -251,6 +251,9 @@ struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t {
|
|||
|
||||
kmp_cmplx128_a4_t() : q() {}
|
||||
|
||||
#if defined(__cplusplus) && (KMP_OS_WINDOWS)
|
||||
kmp_cmplx128_a4_t(const std::complex<_Quad> &c128) : q(c128) {}
|
||||
#endif
|
||||
kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {}
|
||||
|
||||
kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) {
|
||||
|
@ -314,6 +317,9 @@ struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t {
|
|||
|
||||
kmp_cmplx128_a16_t() : q() {}
|
||||
|
||||
#if defined(__cplusplus) && (KMP_OS_WINDOWS)
|
||||
kmp_cmplx128_a16_t(const std::complex<_Quad> &c128) : q(c128) {}
|
||||
#endif
|
||||
kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {}
|
||||
|
||||
kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) {
|
||||
|
|
|
@ -2163,7 +2163,6 @@ void __kmp_join_barrier(int gtid) {
|
|||
|
||||
kmp_info_t *this_thr = __kmp_threads[gtid];
|
||||
kmp_team_t *team;
|
||||
kmp_uint nproc;
|
||||
int tid;
|
||||
#ifdef KMP_DEBUG
|
||||
int team_id;
|
||||
|
@ -2176,12 +2175,14 @@ void __kmp_join_barrier(int gtid) {
|
|||
itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
|
||||
#endif
|
||||
#endif /* USE_ITT_BUILD */
|
||||
#if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
|
||||
int nproc = this_thr->th.th_team_nproc;
|
||||
#endif
|
||||
KMP_MB();
|
||||
|
||||
// Get current info
|
||||
team = this_thr->th.th_team;
|
||||
nproc = this_thr->th.th_team_nproc;
|
||||
KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
|
||||
KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
|
||||
tid = __kmp_tid_from_gtid(gtid);
|
||||
#ifdef KMP_DEBUG
|
||||
team_id = team->t.t_id;
|
||||
|
@ -2354,7 +2355,7 @@ void __kmp_join_barrier(int gtid) {
|
|||
// Set arrive time to zero to be able to check it in
|
||||
// __kmp_invoke_task(); the same is done inside the loop below
|
||||
this_thr->th.th_bar_arrive_time = 0;
|
||||
for (kmp_uint i = 1; i < nproc; ++i) {
|
||||
for (int i = 1; i < nproc; ++i) {
|
||||
delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
|
||||
other_threads[i]->th.th_bar_arrive_time = 0;
|
||||
}
|
||||
|
|
|
@ -685,13 +685,13 @@ void __kmpc_flush(ident_t *loc) {
|
|||
if (!__kmp_cpuinfo.flags.sse2) {
|
||||
// CPU cannot execute SSE2 instructions.
|
||||
} else {
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
_mm_mfence();
|
||||
#elif KMP_COMPILER_MSVC
|
||||
MemoryBarrier();
|
||||
#else
|
||||
__sync_synchronize();
|
||||
#endif // KMP_COMPILER_ICC
|
||||
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
}
|
||||
#endif // KMP_MIC
|
||||
#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
|
||||
|
|
|
@ -226,16 +226,16 @@ kmp_omp_struct_info_t __kmp_omp_debug_struct_info = {
|
|||
when 64-bit value is assigned to 32-bit pointer. Use this function
|
||||
to suppress the warning. */
|
||||
static inline void *__kmp_convert_to_ptr(kmp_uint64 addr) {
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 810) // conversion from "unsigned long long" to "char
|
||||
// *" may lose significant bits
|
||||
#pragma warning(disable : 1195) // conversion from integer to smaller pointer
|
||||
#endif // KMP_COMPILER_ICC
|
||||
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
return (void *)addr;
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
#pragma warning(pop)
|
||||
#endif // KMP_COMPILER_ICC
|
||||
#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
} // __kmp_convert_to_ptr
|
||||
|
||||
static int kmp_location_match(kmp_str_loc_t *loc, kmp_omp_nthr_item_t *item) {
|
||||
|
|
|
@ -1954,7 +1954,7 @@ static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
|
|||
|
||||
// We need a fence here, since we must ensure that no memory operations
|
||||
// from later in this thread float above that read.
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
_mm_mfence();
|
||||
#else
|
||||
__sync_synchronize();
|
||||
|
|
|
@ -53,8 +53,12 @@
|
|||
#define KMP_COMPILER_GCC 0
|
||||
#define KMP_COMPILER_CLANG 0
|
||||
#define KMP_COMPILER_MSVC 0
|
||||
#define KMP_COMPILER_ICX 0
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#if __INTEL_CLANG_COMPILER
|
||||
#undef KMP_COMPILER_ICX
|
||||
#define KMP_COMPILER_ICX 1
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#undef KMP_COMPILER_ICC
|
||||
#define KMP_COMPILER_ICC 1
|
||||
#elif defined(__clang__)
|
||||
|
@ -85,7 +89,7 @@
|
|||
/* Check for quad-precision extension. */
|
||||
#define KMP_HAVE_QUAD 0
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
/* _Quad is already defined for icc */
|
||||
#undef KMP_HAVE_QUAD
|
||||
#define KMP_HAVE_QUAD 1
|
||||
|
@ -448,8 +452,10 @@ enum kmp_mem_fence_type {
|
|||
#pragma intrinsic(InterlockedExchangeAdd)
|
||||
#pragma intrinsic(InterlockedCompareExchange)
|
||||
#pragma intrinsic(InterlockedExchange)
|
||||
#if !(KMP_COMPILER_ICX && KMP_32_BIT_ARCH)
|
||||
#pragma intrinsic(InterlockedExchange64)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Using InterlockedIncrement / InterlockedDecrement causes a library loading
|
||||
// ordering problem, so we use InterlockedExchangeAdd instead.
|
||||
|
@ -842,8 +848,14 @@ static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,
|
|||
(kmp_uint64)(sv))
|
||||
#endif
|
||||
|
||||
#if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800
|
||||
#define KMP_XCHG_FIXED8(p, v) \
|
||||
__atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \
|
||||
__ATOMIC_SEQ_CST)
|
||||
#else
|
||||
#define KMP_XCHG_FIXED8(p, v) \
|
||||
__sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v))
|
||||
#endif
|
||||
#define KMP_XCHG_FIXED16(p, v) \
|
||||
__sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v))
|
||||
#define KMP_XCHG_FIXED32(p, v) \
|
||||
|
@ -1026,7 +1038,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
|
|||
#endif
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
|
||||
#define KMP_MFENCE_() _mm_mfence()
|
||||
#define KMP_SFENCE_() _mm_sfence()
|
||||
#elif KMP_COMPILER_MSVC
|
||||
|
|
|
@ -8953,19 +8953,16 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
|
|||
KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
|
||||
}
|
||||
// Release all the workers
|
||||
kmp_uint64 new_value; // new value for go
|
||||
new_value = team->t.b->go_release();
|
||||
team->t.b->go_release();
|
||||
|
||||
KMP_MFENCE();
|
||||
|
||||
// Workers should see transition status 2 and move to 0; but may need to be
|
||||
// woken up first
|
||||
size_t my_go_index;
|
||||
int count = old_nthreads - 1;
|
||||
while (count > 0) {
|
||||
count = old_nthreads - 1;
|
||||
for (int f = 1; f < old_nthreads; ++f) {
|
||||
my_go_index = f / team->t.b->threads_per_go;
|
||||
if (other_threads[f]->th.th_used_in_team.load() != 0) {
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
|
||||
kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
|
||||
|
|
|
@ -159,11 +159,11 @@ void *kmp_malloc(size_t size) {
|
|||
}
|
||||
void *kmp_aligned_malloc(size_t sz, size_t a) {
|
||||
i;
|
||||
int err;
|
||||
void *res;
|
||||
#if KMP_OS_WINDOWS
|
||||
res = _aligned_malloc(sz, a);
|
||||
#else
|
||||
int err;
|
||||
if ((err = posix_memalign(&res, a, sz))) {
|
||||
errno = err; // can be EINVAL or ENOMEM
|
||||
res = NULL;
|
||||
|
@ -393,12 +393,12 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
|
|||
|
||||
void *omp_aligned_alloc(size_t a, size_t size, omp_allocator_handle_t al) {
|
||||
i;
|
||||
int err;
|
||||
void *res;
|
||||
#if KMP_OS_WINDOWS
|
||||
res = _aligned_malloc(size, a);
|
||||
#else
|
||||
if (err = posix_memalign(&res, a, size)) {
|
||||
int err;
|
||||
if ((err = posix_memalign(&res, a, size))) {
|
||||
errno = err; // can be EINVAL or ENOMEM
|
||||
res = NULL;
|
||||
}
|
||||
|
@ -420,12 +420,12 @@ void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t al) {
|
|||
void *omp_aligned_calloc(size_t a, size_t nmemb, size_t size,
|
||||
omp_allocator_handle_t al) {
|
||||
i;
|
||||
int err;
|
||||
void *res;
|
||||
#if KMP_OS_WINDOWS
|
||||
res = _aligned_recalloc(NULL, nmemb, size, a);
|
||||
#else
|
||||
if (err = posix_memalign(&res, a, nmemb * size)) {
|
||||
int err;
|
||||
if ((err = posix_memalign(&res, a, nmemb * size))) {
|
||||
errno = err; // can be EINVAL or ENOMEM
|
||||
res = NULL;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,9 @@
|
|||
#define stringer(x) _stringer(x)
|
||||
|
||||
// Detect compiler.
|
||||
#if KMP_COMPILER_ICC
|
||||
#if KMP_COMPILER_ICX
|
||||
#define KMP_COMPILER __VERSION__
|
||||
#elif KMP_COMPILER_ICC
|
||||
#if __INTEL_COMPILER == 1010
|
||||
#define KMP_COMPILER "Intel(R) C++ Compiler 10.1"
|
||||
#elif __INTEL_COMPILER == 1100
|
||||
|
@ -53,8 +55,10 @@
|
|||
#define KMP_COMPILER "Intel(R) C++ Compiler 19.0"
|
||||
#elif __INTEL_COMPILER == 1910
|
||||
#define KMP_COMPILER "Intel(R) C++ Compiler 19.1"
|
||||
#elif __INTEL_COMPILER >= 9900
|
||||
#define KMP_COMPILER "Intel(R) C++ Compiler mainline"
|
||||
#elif __INTEL_COMPILER > 1910
|
||||
#define KMP_COMPILER \
|
||||
"Intel(R) C++ Compiler Classic " stringer(__INTEL_COMPILER) "." stringer( \
|
||||
__INTEL_COMPILER_UPDATE)
|
||||
#endif
|
||||
#elif KMP_COMPILER_CLANG
|
||||
#define KMP_COMPILER \
|
||||
|
|
|
@ -310,7 +310,8 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
|
|||
OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
|
||||
HMODULE h = LoadLibrary(fname);
|
||||
if (!h) {
|
||||
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n", GetLastError());
|
||||
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
|
||||
(unsigned)GetLastError());
|
||||
} else {
|
||||
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Success. \n");
|
||||
OMPT_VERBOSE_INIT_PRINT("Searching for ompt_start_tool in %s... ",
|
||||
|
@ -318,7 +319,7 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
|
|||
start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
|
||||
if (!start_tool) {
|
||||
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: Error %u\n",
|
||||
GetLastError());
|
||||
(unsigned)GetLastError());
|
||||
} else
|
||||
#else
|
||||
#error Activation of OMPT is not supported on this platform.
|
||||
|
|
|
@ -568,7 +568,8 @@ void __kmp_gtid_set_specific(int gtid) {
|
|||
if (__kmp_init_gtid) {
|
||||
KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid,
|
||||
__kmp_gtid_threadprivate_key));
|
||||
if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1)))
|
||||
kmp_intptr_t g = (kmp_intptr_t)gtid;
|
||||
if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(g + 1)))
|
||||
KMP_FATAL(TLSSetValueFailed);
|
||||
} else {
|
||||
KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
|
||||
|
@ -934,9 +935,8 @@ void __kmp_terminate_thread(int gtid) {
|
|||
}
|
||||
|
||||
void __kmp_clear_system_time(void) {
|
||||
BOOL status;
|
||||
LARGE_INTEGER time;
|
||||
status = QueryPerformanceCounter(&time);
|
||||
QueryPerformanceCounter(&time);
|
||||
__kmp_win32_time = (kmp_int64)time.QuadPart;
|
||||
}
|
||||
|
||||
|
@ -960,9 +960,8 @@ void __kmp_initialize_system_tick(void) {
|
|||
/* Calculate the elapsed wall clock time for the user */
|
||||
|
||||
void __kmp_elapsed(double *t) {
|
||||
BOOL status;
|
||||
LARGE_INTEGER now;
|
||||
status = QueryPerformanceCounter(&now);
|
||||
QueryPerformanceCounter(&now);
|
||||
*t = ((double)now.QuadPart) * __kmp_win32_tick;
|
||||
}
|
||||
|
||||
|
@ -972,11 +971,8 @@ void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; }
|
|||
|
||||
void __kmp_read_system_time(double *delta) {
|
||||
if (delta != NULL) {
|
||||
BOOL status;
|
||||
LARGE_INTEGER now;
|
||||
|
||||
status = QueryPerformanceCounter(&now);
|
||||
|
||||
QueryPerformanceCounter(&now);
|
||||
*delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) *
|
||||
__kmp_win32_tick;
|
||||
}
|
||||
|
@ -1020,6 +1016,7 @@ extern "C" void *__stdcall __kmp_launch_worker(void *arg) {
|
|||
|
||||
if (__kmp_stkoffset > 0 && gtid > 0) {
|
||||
padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
|
||||
(void)padding;
|
||||
}
|
||||
|
||||
KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive);
|
||||
|
@ -1354,9 +1351,10 @@ static void __kmp_reap_common(kmp_info_t *th) {
|
|||
/* NOTE: The ExitProcess(code) system call causes all threads to Terminate
|
||||
with a exit_val = code. Because of this we can not rely on exit_val having
|
||||
any particular value. */
|
||||
kmp_intptr_t e = (kmp_intptr_t)exit_val;
|
||||
if (exit_val == STILL_ACTIVE) {
|
||||
KA_TRACE(1, ("__kmp_reap_common: thread still active.\n"));
|
||||
} else if ((void *)exit_val != (void *)th) {
|
||||
} else if ((void *)e != (void *)th) {
|
||||
KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n"));
|
||||
}
|
||||
|
||||
|
@ -1519,13 +1517,12 @@ void __kmp_thread_sleep(int millis) {
|
|||
|
||||
// Determine whether the given address is mapped into the current address space.
|
||||
int __kmp_is_address_mapped(void *addr) {
|
||||
DWORD status;
|
||||
MEMORY_BASIC_INFORMATION lpBuffer;
|
||||
SIZE_T dwLength;
|
||||
|
||||
dwLength = sizeof(MEMORY_BASIC_INFORMATION);
|
||||
|
||||
status = VirtualQuery(addr, &lpBuffer, dwLength);
|
||||
VirtualQuery(addr, &lpBuffer, dwLength);
|
||||
|
||||
return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) ||
|
||||
((lpBuffer.Protect == PAGE_NOACCESS) ||
|
||||
|
|
|
@ -30,6 +30,7 @@ pythonize_bool(LIBOMP_OMPT_OPTIONAL)
|
|||
pythonize_bool(LIBOMP_HAVE_LIBM)
|
||||
pythonize_bool(LIBOMP_HAVE_LIBATOMIC)
|
||||
pythonize_bool(OPENMP_STANDALONE_BUILD)
|
||||
pythonize_bool(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS)
|
||||
|
||||
add_library(ompt-print-callback INTERFACE)
|
||||
target_include_directories(ompt-print-callback INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/ompt)
|
||||
|
|
|
@ -3,30 +3,22 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
|
||||
// should print all for first parallel
|
||||
omp_set_num_threads(4);
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
go_parallel_nthreads(4);
|
||||
// should print all because of new threads
|
||||
omp_set_num_threads(8);
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
go_parallel_nthreads(8);
|
||||
// should not print anything here
|
||||
omp_set_num_threads(6);
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
go_parallel_nthreads(6);
|
||||
// should print all because of new thread
|
||||
omp_set_num_threads(9);
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
go_parallel_nthreads(9);
|
||||
// should not print anything here
|
||||
omp_set_num_threads(2);
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel_nthreads(2);
|
||||
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
|
||||
|
|
|
@ -4,16 +4,16 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
|
||||
omp_set_nested(1);
|
||||
#pragma omp parallel num_threads(4)
|
||||
{
|
||||
#pragma omp parallel num_threads(3)
|
||||
{ }
|
||||
go_parallel_nthreads(3);
|
||||
}
|
||||
return 0;
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the
|
||||
// nest depth for intuitive behavior
|
||||
|
@ -11,14 +12,11 @@ int main(int argc, char** argv) {
|
|||
omp_set_nested(1);
|
||||
#pragma omp parallel num_threads(4)
|
||||
{
|
||||
#pragma omp parallel num_threads(3)
|
||||
{ }
|
||||
#pragma omp parallel num_threads(3)
|
||||
{ }
|
||||
go_parallel_nthreads(3);
|
||||
go_parallel_nthreads(3);
|
||||
}
|
||||
#pragma omp parallel num_threads(4)
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel_nthreads(4);
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
|
||||
|
|
|
@ -3,30 +3,26 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
|
||||
omp_set_nested(1);
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
#pragma omp parallel num_threads(2)
|
||||
{ }
|
||||
go_parallel_nthreads(2);
|
||||
#pragma omp parallel num_threads(2)
|
||||
{
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
#pragma omp parallel num_threads(2)
|
||||
{ }
|
||||
go_parallel_nthreads(2);
|
||||
}
|
||||
}
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
go_parallel_nthreads(1);
|
||||
}
|
||||
#pragma omp parallel num_threads(2)
|
||||
{ }
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel_nthreads(2);
|
||||
go_parallel_nthreads(1);
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
|
||||
|
|
|
@ -3,29 +3,24 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
|
||||
omp_set_nested(1);
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
go_parallel_nthreads(1);
|
||||
go_parallel_nthreads(1);
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
go_parallel_nthreads(1);
|
||||
}
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
go_parallel_nthreads(1);
|
||||
}
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
#pragma omp parallel num_threads(1)
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel_nthreads(1);
|
||||
go_parallel_nthreads(1);
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
|
||||
|
|
|
@ -4,26 +4,21 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N aff:{%A}");
|
||||
omp_set_num_threads(8);
|
||||
// Initial parallel
|
||||
#pragma omp parallel proc_bind(spread)
|
||||
{ }
|
||||
#pragma omp parallel proc_bind(spread)
|
||||
{ }
|
||||
go_parallel_spread();
|
||||
go_parallel_spread();
|
||||
// Affinity changes here
|
||||
#pragma omp parallel proc_bind(close)
|
||||
{ }
|
||||
#pragma omp parallel proc_bind(close)
|
||||
{ }
|
||||
go_parallel_close();
|
||||
go_parallel_close();
|
||||
// Affinity changes here
|
||||
#pragma omp parallel proc_bind(master)
|
||||
{ }
|
||||
#pragma omp parallel proc_bind(master)
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel_master();
|
||||
go_parallel_master();
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
|
||||
|
|
|
@ -9,14 +9,13 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel();
|
||||
go_parallel();
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// NOTHING: NO_OUTPUT
|
||||
|
|
|
@ -4,13 +4,12 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <omp.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
#pragma omp parallel
|
||||
{ }
|
||||
return 0;
|
||||
go_parallel();
|
||||
go_parallel();
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8$
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
// CHECK-SAME: cores
|
||||
// REQUIRES: affinity
|
||||
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main() {
|
||||
#pragma omp parallel
|
||||
{}
|
||||
return 0;
|
||||
go_parallel();
|
||||
return get_exit_value();
|
||||
}
|
||||
|
|
|
@ -45,9 +45,13 @@ config.test_format = lit.formats.ShTest()
|
|||
flags = " -I " + config.test_source_root + \
|
||||
" -L " + config.library_dir + \
|
||||
" " + config.test_extra_flags
|
||||
if config.has_omit_frame_pointer_flag:
|
||||
flags += " -fno-omit-frame-pointer"
|
||||
|
||||
config.test_flags = " -I " + config.omp_header_directory + flags
|
||||
config.test_flags_use_compiler_omp_h = flags
|
||||
|
||||
|
||||
# extra libraries
|
||||
libs = ""
|
||||
if config.has_libm:
|
||||
|
|
|
@ -17,6 +17,7 @@ config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@
|
|||
config.has_libm = @LIBOMP_HAVE_LIBM@
|
||||
config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@
|
||||
config.is_standalone_build = @OPENMP_STANDALONE_BUILD@
|
||||
config.has_omit_frame_pointer_flag = @OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS@
|
||||
|
||||
# Let the main config do the real work.
|
||||
lit_config.load_config(config, "@LIBOMP_BASE_DIR@/test/lit.cfg")
|
||||
|
|
|
@ -20,6 +20,60 @@
|
|||
#define NUM_TASKS 25
|
||||
#define MAX_TASKS_PER_THREAD 5
|
||||
|
||||
// Functions that call a parallel region that does very minimal work
|
||||
// Some compilers may optimize away an empty parallel region
|
||||
volatile int g_counter__;
|
||||
|
||||
// If nthreads == 0, then do not use num_threads() clause
|
||||
static void go_parallel() {
|
||||
g_counter__ = 0;
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp atomic
|
||||
g_counter__++;
|
||||
}
|
||||
}
|
||||
|
||||
static void go_parallel_nthreads(int nthreads) {
|
||||
g_counter__ = 0;
|
||||
#pragma omp parallel num_threads(nthreads)
|
||||
{
|
||||
#pragma omp atomic
|
||||
g_counter__++;
|
||||
}
|
||||
}
|
||||
|
||||
static void go_parallel_spread() {
|
||||
g_counter__ = 0;
|
||||
#pragma omp parallel proc_bind(spread)
|
||||
{
|
||||
#pragma omp atomic
|
||||
g_counter__++;
|
||||
}
|
||||
}
|
||||
|
||||
static void go_parallel_close() {
|
||||
g_counter__ = 0;
|
||||
#pragma omp parallel proc_bind(close)
|
||||
{
|
||||
#pragma omp atomic
|
||||
g_counter__++;
|
||||
}
|
||||
}
|
||||
|
||||
static void go_parallel_master() {
|
||||
g_counter__ = 0;
|
||||
#pragma omp parallel proc_bind(master)
|
||||
{
|
||||
#pragma omp atomic
|
||||
g_counter__++;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int get_exit_value() {
|
||||
return ((g_counter__ == -1) ? EXIT_FAILURE : EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Windows versions of pthread_create() and pthread_join()
|
||||
# include <windows.h>
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
// RUN: %libomp-compile-and-run | FileCheck %s
|
||||
// REQUIRES: ompt
|
||||
#include "callback.h"
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int main() {
|
||||
#pragma omp parallel num_threads(2)
|
||||
{}
|
||||
go_parallel_nthreads(2);
|
||||
|
||||
printf("Before ompt_finalize_tool\n");
|
||||
ompt_finalize_tool();
|
||||
printf("After ompt_finalize_tool\n");
|
||||
|
||||
return 0;
|
||||
return get_exit_value();
|
||||
}
|
||||
|
||||
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
|
|
@ -1,19 +1,14 @@
|
|||
// RUN: %libomp-compile-and-run | FileCheck %s
|
||||
// REQUIRES: ompt
|
||||
#include "callback.h"
|
||||
#include "omp_testsuite.h"
|
||||
#include <omp.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
|
||||
}
|
||||
go_parallel_nthreads(1);
|
||||
ompt_set_callback(ompt_callback_parallel_begin, NULL);
|
||||
#pragma omp parallel num_threads(1)
|
||||
{
|
||||
|
||||
}
|
||||
go_parallel_nthreads(1);
|
||||
|
||||
// Check if libomp supports the callbacks for this test.
|
||||
// CHECK-NOT: {{^}}0: Could not register callback 'ompt_callback_idle'
|
||||
|
@ -25,5 +20,5 @@ int main()
|
|||
// CHECK-NOT: {{^}}[[THREAD_ID]]: ompt_event_parallel_begin:
|
||||
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_parallel_end:
|
||||
|
||||
return 0;
|
||||
return get_exit_value();
|
||||
}
|
||||
|
|
|
@ -8,8 +8,7 @@
|
|||
// XFAIL: icc
|
||||
|
||||
// support for taskwait with depend clause introduced in clang-14
|
||||
// UNSUPPORTED: clang-5, clang-6, clang-6, clang-8, clang-9, clang-10, clang-11,
|
||||
// clang-12, clang-13
|
||||
// UNSUPPORTED: clang-5, clang-6, clang-6, clang-8, clang-9, clang-10, clang-11, clang-12, clang-13
|
||||
|
||||
#include "callback.h"
|
||||
#include <omp.h>
|
||||
|
|
Loading…
Reference in New Issue