2014-08-05 17:32:28 +08:00
|
|
|
#
|
|
|
|
#//===----------------------------------------------------------------------===//
|
|
|
|
#//
|
|
|
|
#// The LLVM Compiler Infrastructure
|
|
|
|
#//
|
|
|
|
#// This file is dual licensed under the MIT and the University of Illinois Open
|
|
|
|
#// Source Licenses. See LICENSE.txt for details.
|
|
|
|
#//
|
|
|
|
#//===----------------------------------------------------------------------===//
|
|
|
|
#
|
2014-06-02 02:01:33 +08:00
|
|
|
|
2017-11-30 03:31:43 +08:00
|
|
|
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
|
|
|
|
message(FATAL_ERROR "Direct configuration not supported, please use parent directory!")
|
|
|
|
endif()
|
2014-08-05 17:32:28 +08:00
|
|
|
|
|
|
|
# Add cmake directory to search for custom cmake functions
|
|
|
|
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
|
|
|
|
|
2015-08-29 02:42:10 +08:00
|
|
|
# Set libomp version
|
|
|
|
set(LIBOMP_VERSION_MAJOR 5)
|
|
|
|
set(LIBOMP_VERSION_MINOR 0)
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# These include files are in the cmake/ subdirectory
|
|
|
|
include(LibompUtils)
|
|
|
|
include(LibompGetArchitecture)
|
|
|
|
include(LibompHandleFlags)
|
|
|
|
include(LibompDefinitions)
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Determine the target architecture
|
2017-11-30 03:31:43 +08:00
|
|
|
if(${OPENMP_STANDALONE_BUILD})
|
2015-07-16 00:57:19 +08:00
|
|
|
# If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
|
|
|
|
libomp_get_architecture(LIBOMP_DETECTED_ARCH)
|
|
|
|
set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
|
2016-12-08 17:22:24 +08:00
|
|
|
"The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64).")
|
2015-07-16 00:57:19 +08:00
|
|
|
# Should assertions be enabled? They are on by default.
|
|
|
|
set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
|
|
|
|
"enable assertions?")
|
2015-07-16 00:05:30 +08:00
|
|
|
else() # Part of LLVM build
|
2015-07-16 00:57:19 +08:00
|
|
|
# Determine the native architecture from LLVM.
|
|
|
|
string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH)
|
|
|
|
if( LIBOMP_NATIVE_ARCH STREQUAL "host" )
|
|
|
|
string(REGEX MATCH "^[^-]*" LIBOMP_NATIVE_ARCH ${LLVM_HOST_TRIPLE})
|
|
|
|
endif ()
|
|
|
|
if(LIBOMP_NATIVE_ARCH MATCHES "i[2-6]86")
|
|
|
|
set(LIBOMP_ARCH i386)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH STREQUAL "x86")
|
|
|
|
set(LIBOMP_ARCH i386)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH STREQUAL "amd64")
|
|
|
|
set(LIBOMP_ARCH x86_64)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH STREQUAL "x86_64")
|
|
|
|
set(LIBOMP_ARCH x86_64)
|
2016-09-10 02:04:23 +08:00
|
|
|
elseif(LIBOMP_NATIVE_ARCH MATCHES "powerpc64le")
|
|
|
|
set(LIBOMP_ARCH ppc64le)
|
2015-07-16 00:57:19 +08:00
|
|
|
elseif(LIBOMP_NATIVE_ARCH MATCHES "powerpc")
|
|
|
|
set(LIBOMP_ARCH ppc64)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH MATCHES "aarch64")
|
|
|
|
set(LIBOMP_ARCH aarch64)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm64")
|
|
|
|
set(LIBOMP_ARCH aarch64)
|
|
|
|
elseif(LIBOMP_NATIVE_ARCH MATCHES "arm")
|
|
|
|
set(LIBOMP_ARCH arm)
|
|
|
|
else()
|
|
|
|
# last ditch effort
|
|
|
|
libomp_get_architecture(LIBOMP_ARCH)
|
|
|
|
endif ()
|
|
|
|
set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
2016-12-08 17:22:24 +08:00
|
|
|
libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 mic mips mips64)
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_LIB_TYPE normal CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Performance,Profiling,Stubs library (normal/profile/stubs)")
|
2015-07-16 00:05:30 +08:00
|
|
|
libomp_check_variable(LIBOMP_LIB_TYPE normal profile stubs)
|
2017-03-07 06:07:40 +08:00
|
|
|
set(LIBOMP_OMP_VERSION 50 CACHE STRING
|
|
|
|
"The OpenMP version (50/45/40/30)")
|
|
|
|
libomp_check_variable(LIBOMP_OMP_VERSION 50 45 40 30)
|
2015-08-29 02:42:10 +08:00
|
|
|
# Set the OpenMP Year and Month assiociated with version
|
2017-03-07 06:07:40 +08:00
|
|
|
if(${LIBOMP_OMP_VERSION} GREATER 50 OR ${LIBOMP_OMP_VERSION} EQUAL 50)
|
|
|
|
set(LIBOMP_OMP_YEAR_MONTH 201611)
|
|
|
|
elseif(${LIBOMP_OMP_VERSION} GREATER 45 OR ${LIBOMP_OMP_VERSION} EQUAL 45)
|
2016-09-30 23:50:14 +08:00
|
|
|
set(LIBOMP_OMP_YEAR_MONTH 201511)
|
|
|
|
elseif(${LIBOMP_OMP_VERSION} GREATER 40 OR ${LIBOMP_OMP_VERSION} EQUAL 40)
|
2015-08-29 02:42:10 +08:00
|
|
|
set(LIBOMP_OMP_YEAR_MONTH 201307)
|
|
|
|
elseif(${LIBOMP_OMP_VERSION} GREATER 30 OR ${LIBOMP_OMP_VERSION} EQUAL 30)
|
|
|
|
set(LIBOMP_OMP_YEAR_MONTH 201107)
|
|
|
|
else()
|
|
|
|
set(LIBOMP_OMP_YEAR_MONTH 200505)
|
|
|
|
endif()
|
2015-07-16 00:05:30 +08:00
|
|
|
set(LIBOMP_MIC_ARCH knc CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Intel(R) Many Integrated Core Architecture (Intel(R) MIC Architecture) (knf/knc). Ignored if not Intel(R) MIC Architecture build.")
|
2015-07-16 00:05:30 +08:00
|
|
|
if("${LIBOMP_ARCH}" STREQUAL "mic")
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_check_variable(LIBOMP_MIC_ARCH knf knc)
|
2015-07-16 00:05:30 +08:00
|
|
|
endif()
|
|
|
|
set(LIBOMP_FORTRAN_MODULES FALSE CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Create Fortran module files? (requires fortran compiler)")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-05-30 00:13:56 +08:00
|
|
|
# - Support for universal fat binary builds on Mac
|
2015-07-16 00:05:30 +08:00
|
|
|
# - Having this extra variable allows people to build this library as a universal library
|
2015-05-30 00:13:56 +08:00
|
|
|
# without forcing a universal build of the llvm/clang compiler.
|
|
|
|
set(LIBOMP_OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"For Mac builds, semicolon separated list of architectures to build for universal fat binary.")
|
2015-05-30 00:13:56 +08:00
|
|
|
set(CMAKE_OSX_ARCHITECTURES ${LIBOMP_OSX_ARCHITECTURES})
|
|
|
|
|
2016-02-10 06:15:30 +08:00
|
|
|
# Should @rpath be used for dynamic libraries on Mac?
|
|
|
|
# The if(NOT DEFINED) is there to guard a cached value of the variable if one
|
|
|
|
# exists so there is no interference with what the user wants. Also, no cache entry
|
|
|
|
# is created so there are no inadvertant effects on other parts of LLVM.
|
|
|
|
if(NOT DEFINED CMAKE_MACOSX_RPATH)
|
|
|
|
set(CMAKE_MACOSX_RPATH TRUE)
|
|
|
|
endif()
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# User specified flags. These are appended to the configured flags.
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_CFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified C compiler flags.")
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_CXXFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified C++ compiler flags.")
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_CPPFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified C preprocessor flags.")
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_ASMFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified assembler flags.")
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_LDFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified linker flags.")
|
2015-05-27 01:27:01 +08:00
|
|
|
set(LIBOMP_LIBFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified linked libs flags. (e.g., -lm)")
|
2015-07-16 00:05:30 +08:00
|
|
|
set(LIBOMP_FFLAGS "" CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Appended user specified Fortran compiler flags. These are only used if LIBOMP_FORTRAN_MODULES==TRUE.")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-06-01 11:05:13 +08:00
|
|
|
# Should the libomp library and generated headers be copied into the original source exports/ directory
|
2015-07-16 00:05:30 +08:00
|
|
|
# Turning this to FALSE aids parallel builds to not interfere with each other.
|
|
|
|
# Currently, the testsuite module expects the just built OpenMP library to be located inside the exports/
|
|
|
|
# directory. TODO: have testsuite run under llvm-lit directly. We can then get rid of copying to exports/
|
|
|
|
set(LIBOMP_COPY_EXPORTS TRUE CACHE STRING
|
2015-07-16 00:57:19 +08:00
|
|
|
"Should exports be copied into source exports/ directory?")
|
2015-05-14 20:54:08 +08:00
|
|
|
|
2015-12-01 04:02:59 +08:00
|
|
|
# HWLOC-support
|
|
|
|
set(LIBOMP_USE_HWLOC FALSE CACHE BOOL
|
|
|
|
"Use Hwloc (http://www.open-mpi.org/projects/hwloc/) library for affinity?")
|
|
|
|
set(LIBOMP_HWLOC_INSTALL_DIR /usr/local CACHE PATH
|
|
|
|
"Install path for hwloc library")
|
|
|
|
|
2016-12-15 06:39:11 +08:00
|
|
|
# Get the build number from kmp_version.cpp
|
2015-08-29 02:42:10 +08:00
|
|
|
libomp_get_build_number("${CMAKE_CURRENT_SOURCE_DIR}" LIBOMP_VERSION_BUILD)
|
|
|
|
math(EXPR LIBOMP_VERSION_BUILD_YEAR "${LIBOMP_VERSION_BUILD}/10000")
|
|
|
|
math(EXPR LIBOMP_VERSION_BUILD_MONTH_DAY "${LIBOMP_VERSION_BUILD}%10000")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Currently don't record any timestamps
|
2015-08-29 02:42:10 +08:00
|
|
|
set(LIBOMP_BUILD_DATE "No_Timestamp")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
|
|
|
# Architecture
|
|
|
|
set(IA32 FALSE)
|
|
|
|
set(INTEL64 FALSE)
|
|
|
|
set(ARM FALSE)
|
2015-01-13 22:43:35 +08:00
|
|
|
set(AARCH64 FALSE)
|
2015-01-20 02:29:35 +08:00
|
|
|
set(PPC64BE FALSE)
|
|
|
|
set(PPC64LE FALSE)
|
2014-08-07 18:12:54 +08:00
|
|
|
set(PPC64 FALSE)
|
2015-07-16 00:05:30 +08:00
|
|
|
set(MIC FALSE)
|
2016-12-08 17:22:24 +08:00
|
|
|
set(MIPS64 FALSE)
|
|
|
|
set(MIPS FALSE)
|
2015-07-16 00:57:19 +08:00
|
|
|
if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture
|
|
|
|
set(IA32 TRUE)
|
2015-07-16 00:05:30 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(INTEL64 TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "arm") # ARM architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(ARM TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "ppc64") # PPC64BE architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(PPC64BE TRUE)
|
|
|
|
set(PPC64 TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "ppc64le") # PPC64LE architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(PPC64LE TRUE)
|
|
|
|
set(PPC64 TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "aarch64") # AARCH64 architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(AARCH64 TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "mic") # Intel(R) Many Integrated Core Architecture
|
2015-07-16 00:57:19 +08:00
|
|
|
set(MIC TRUE)
|
2016-12-08 17:22:24 +08:00
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture
|
|
|
|
set(MIPS TRUE)
|
|
|
|
elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture
|
|
|
|
set(MIPS64 TRUE)
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
|
|
|
# Set some flags based on build_type
|
2015-05-27 01:27:01 +08:00
|
|
|
set(RELEASE_BUILD FALSE)
|
|
|
|
set(DEBUG_BUILD FALSE)
|
2014-08-05 17:32:28 +08:00
|
|
|
set(RELWITHDEBINFO_BUILD FALSE)
|
2015-07-16 00:05:30 +08:00
|
|
|
set(MINSIZEREL_BUILD FALSE)
|
|
|
|
string(TOLOWER "${CMAKE_BUILD_TYPE}" libomp_build_type_lowercase)
|
|
|
|
if("${libomp_build_type_lowercase}" STREQUAL "release")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(RELEASE_BUILD TRUE)
|
2015-07-16 00:05:30 +08:00
|
|
|
elseif("${libomp_build_type_lowercase}" STREQUAL "debug")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(DEBUG_BUILD TRUE)
|
2015-07-16 00:05:30 +08:00
|
|
|
elseif("${libomp_build_type_lowercase}" STREQUAL "relwithdebinfo")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(RELWITHDEBINFO_BUILD TRUE)
|
2015-07-16 00:05:30 +08:00
|
|
|
elseif("${libomp_build_type_lowercase}" STREQUAL "minsizerel")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(MINSIZEREL_BUILD TRUE)
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
2016-05-27 02:19:10 +08:00
|
|
|
# Include itt notify interface?
|
|
|
|
set(LIBOMP_USE_ITT_NOTIFY TRUE CACHE BOOL
|
|
|
|
"Enable ITT notify?")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
|
|
|
# normal, profile, stubs library.
|
|
|
|
set(NORMAL_LIBRARY FALSE)
|
|
|
|
set(STUBS_LIBRARY FALSE)
|
|
|
|
set(PROFILE_LIBRARY FALSE)
|
2015-05-21 06:33:24 +08:00
|
|
|
if("${LIBOMP_LIB_TYPE}" STREQUAL "normal")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(NORMAL_LIBRARY TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_LIB_TYPE}" STREQUAL "profile")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(PROFILE_LIBRARY TRUE)
|
2015-05-21 06:33:24 +08:00
|
|
|
elseif("${LIBOMP_LIB_TYPE}" STREQUAL "stubs")
|
2015-07-16 00:57:19 +08:00
|
|
|
set(STUBS_LIBRARY TRUE)
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Setting directory names
|
|
|
|
set(LIBOMP_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
|
|
|
set(LIBOMP_SRC_DIR ${LIBOMP_BASE_DIR}/src)
|
|
|
|
set(LIBOMP_TOOLS_DIR ${LIBOMP_BASE_DIR}/tools)
|
|
|
|
set(LIBOMP_INC_DIR ${LIBOMP_SRC_DIR}/include/${LIBOMP_OMP_VERSION})
|
2015-09-22 04:41:31 +08:00
|
|
|
set(LIBOMP_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Enabling Fortran if it is needed
|
|
|
|
if(${LIBOMP_FORTRAN_MODULES})
|
2015-07-16 00:57:19 +08:00
|
|
|
enable_language(Fortran)
|
2015-07-16 00:05:30 +08:00
|
|
|
endif()
|
2015-07-16 00:57:19 +08:00
|
|
|
# Enable MASM Compiler if it is needed (Windows only)
|
2015-07-16 00:05:30 +08:00
|
|
|
if(WIN32)
|
2015-07-16 00:57:19 +08:00
|
|
|
enable_language(ASM_MASM)
|
2015-07-16 00:05:30 +08:00
|
|
|
endif()
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Getting legal type/arch
|
|
|
|
libomp_get_legal_type(LIBOMP_LEGAL_TYPE)
|
|
|
|
libomp_get_legal_arch(LIBOMP_LEGAL_ARCH)
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Compiler flag checks, library checks, threading check, etc.
|
|
|
|
include(config-ix)
|
I apologise in advance for the size of this check-in. At Intel we do
understand that this is not friendly, and are working to change our
internal code-development to make it easier to make development
features available more frequently and in finer (more functional)
chunks. Unfortunately we haven't got that in place yet, and unpicking
this into multiple separate check-ins would be non-trivial, so please
bear with me on this one. We should be better in the future.
Apologies over, what do we have here?
GGC 4.9 compatibility
--------------------
* We have implemented the new entrypoints used by code compiled by GCC
4.9 to implement the same functionality in gcc 4.8. Therefore code
compiled with gcc 4.9 that used to work will continue to do so.
However, there are some other new entrypoints (associated with task
cancellation) which are not implemented. Therefore user code compiled
by gcc 4.9 that uses these new features will not link against the LLVM
runtime. (It remains unclear how to handle those entrypoints, since
the GCC interface has potentially unpleasant performance implications
for join barriers even when cancellation is not used)
--- new parallel entry points ---
new entry points that aren't OpenMP 4.0 related
These are implemented fully :-
GOMP_parallel_loop_dynamic()
GOMP_parallel_loop_guided()
GOMP_parallel_loop_runtime()
GOMP_parallel_loop_static()
GOMP_parallel_sections()
GOMP_parallel()
--- cancellation entry points ---
Currently, these only give a runtime error if OMP_CANCELLATION is true
because our plain barriers don't check for cancellation while waiting
GOMP_barrier_cancel()
GOMP_cancel()
GOMP_cancellation_point()
GOMP_loop_end_cancel()
GOMP_sections_end_cancel()
--- taskgroup entry points ---
These are implemented fully.
GOMP_taskgroup_start()
GOMP_taskgroup_end()
--- target entry points ---
These are empty (as they are in libgomp)
GOMP_target()
GOMP_target_data()
GOMP_target_end_data()
GOMP_target_update()
GOMP_teams()
Improvements in Barriers and Fork/Join
--------------------------------------
* Barrier and fork/join code is now in its own file (which makes it
easier to understand and modify).
* Wait/release code is now templated and in its own file; suspend/resume code is also templated
* There's a new, hierarchical, barrier, which exploits the
cache-hierarchy of the Intel(r) Xeon Phi(tm) coprocessor to improve
fork/join and barrier performance.
***BEWARE*** the new source files have *not* been added to the legacy
Cmake build system. If you want to use that fixes wil be required.
Statistics Collection Code
--------------------------
* New code has been added to collect application statistics (if this
is enabled at library compile time; by default it is not). The
statistics code itself is generally useful, the lightweight timing
code uses the X86 rdtsc instruction, so will require changes for other
architectures.
The intent of this code is not for users to tune their codes but
rather
1) For timing code-paths inside the runtime
2) For gathering general properties of OpenMP codes to focus attention
on which OpenMP features are most used.
Nested Hot Teams
----------------
* The runtime now maintains more state to reduce the overhead of
creating and destroying inner parallel teams. This improves the
performance of code that repeatedly uses nested parallelism with the
same resource allocation. Set the new KMP_HOT_TEAMS_MAX_LEVEL
envirable to a depth to enable this (and, of course, OMP_NESTED=true
to enable nested parallelism at all).
Improved Intel(r) VTune(Tm) Amplifier support
---------------------------------------------
* The runtime provides additional information to Vtune via the
itt_notify interface to allow it to display better OpenMP specific
analyses of load-imbalance.
Support for OpenMP Composite Statements
---------------------------------------
* Implement new entrypoints required by some of the OpenMP 4.1
composite statements.
Improved ifdefs
---------------
* More separation of concepts ("Does this platform do X?") from
platforms ("Are we compiling for platform Y?"), which should simplify
future porting.
ScaleMP* contribution
---------------------
Stack padding to improve the performance in their environment where
cross-node coherency is managed at the page level.
Redesign of wait and release code
---------------------------------
The code is simplified and performance improved.
Bug Fixes
---------
*Fixes for Windows multiple processor groups.
*Fix Fortran module build on Linux: offload attribute added.
*Fix entry names for distribute-parallel-loop construct to be consistent with the compiler codegen.
*Fix an inconsistent error message for KMP_PLACE_THREADS environment variable.
llvm-svn: 219214
2014-10-08 00:25:50 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Is there a quad precision data type available?
|
|
|
|
# TODO: Make this a real feature check
|
|
|
|
set(LIBOMP_USE_QUAD_PRECISION "${LIBOMP_HAVE_QUAD_PRECISION}" CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Should 128-bit precision entry points be built?")
|
2015-07-16 00:05:30 +08:00
|
|
|
if(LIBOMP_USE_QUAD_PRECISION AND (NOT LIBOMP_HAVE_QUAD_PRECISION))
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_error_say("128-bit quad precision functionality requested but not available")
|
I apologise in advance for the size of this check-in. At Intel we do
understand that this is not friendly, and are working to change our
internal code-development to make it easier to make development
features available more frequently and in finer (more functional)
chunks. Unfortunately we haven't got that in place yet, and unpicking
this into multiple separate check-ins would be non-trivial, so please
bear with me on this one. We should be better in the future.
Apologies over, what do we have here?
GGC 4.9 compatibility
--------------------
* We have implemented the new entrypoints used by code compiled by GCC
4.9 to implement the same functionality in gcc 4.8. Therefore code
compiled with gcc 4.9 that used to work will continue to do so.
However, there are some other new entrypoints (associated with task
cancellation) which are not implemented. Therefore user code compiled
by gcc 4.9 that uses these new features will not link against the LLVM
runtime. (It remains unclear how to handle those entrypoints, since
the GCC interface has potentially unpleasant performance implications
for join barriers even when cancellation is not used)
--- new parallel entry points ---
new entry points that aren't OpenMP 4.0 related
These are implemented fully :-
GOMP_parallel_loop_dynamic()
GOMP_parallel_loop_guided()
GOMP_parallel_loop_runtime()
GOMP_parallel_loop_static()
GOMP_parallel_sections()
GOMP_parallel()
--- cancellation entry points ---
Currently, these only give a runtime error if OMP_CANCELLATION is true
because our plain barriers don't check for cancellation while waiting
GOMP_barrier_cancel()
GOMP_cancel()
GOMP_cancellation_point()
GOMP_loop_end_cancel()
GOMP_sections_end_cancel()
--- taskgroup entry points ---
These are implemented fully.
GOMP_taskgroup_start()
GOMP_taskgroup_end()
--- target entry points ---
These are empty (as they are in libgomp)
GOMP_target()
GOMP_target_data()
GOMP_target_end_data()
GOMP_target_update()
GOMP_teams()
Improvements in Barriers and Fork/Join
--------------------------------------
* Barrier and fork/join code is now in its own file (which makes it
easier to understand and modify).
* Wait/release code is now templated and in its own file; suspend/resume code is also templated
* There's a new, hierarchical, barrier, which exploits the
cache-hierarchy of the Intel(r) Xeon Phi(tm) coprocessor to improve
fork/join and barrier performance.
***BEWARE*** the new source files have *not* been added to the legacy
Cmake build system. If you want to use that fixes wil be required.
Statistics Collection Code
--------------------------
* New code has been added to collect application statistics (if this
is enabled at library compile time; by default it is not). The
statistics code itself is generally useful, the lightweight timing
code uses the X86 rdtsc instruction, so will require changes for other
architectures.
The intent of this code is not for users to tune their codes but
rather
1) For timing code-paths inside the runtime
2) For gathering general properties of OpenMP codes to focus attention
on which OpenMP features are most used.
Nested Hot Teams
----------------
* The runtime now maintains more state to reduce the overhead of
creating and destroying inner parallel teams. This improves the
performance of code that repeatedly uses nested parallelism with the
same resource allocation. Set the new KMP_HOT_TEAMS_MAX_LEVEL
envirable to a depth to enable this (and, of course, OMP_NESTED=true
to enable nested parallelism at all).
Improved Intel(r) VTune(Tm) Amplifier support
---------------------------------------------
* The runtime provides additional information to Vtune via the
itt_notify interface to allow it to display better OpenMP specific
analyses of load-imbalance.
Support for OpenMP Composite Statements
---------------------------------------
* Implement new entrypoints required by some of the OpenMP 4.1
composite statements.
Improved ifdefs
---------------
* More separation of concepts ("Does this platform do X?") from
platforms ("Are we compiling for platform Y?"), which should simplify
future porting.
ScaleMP* contribution
---------------------
Stack padding to improve the performance in their environment where
cross-node coherency is managed at the page level.
Redesign of wait and release code
---------------------------------
The code is simplified and performance improved.
Bug Fixes
---------
*Fixes for Windows multiple processor groups.
*Fix Fortran module build on Linux: offload attribute added.
*Fix entry names for distribute-parallel-loop construct to be consistent with the compiler codegen.
*Fix an inconsistent error message for KMP_PLACE_THREADS environment variable.
llvm-svn: 219214
2014-10-08 00:25:50 +08:00
|
|
|
endif()
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# libgomp drop-in compatibility requires versioned symbols
|
|
|
|
set(LIBOMP_USE_VERSION_SYMBOLS "${LIBOMP_HAVE_VERSION_SYMBOLS}" CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Should version symbols be used? These provide binary compatibility with libgomp.")
|
2015-07-16 00:05:30 +08:00
|
|
|
if(LIBOMP_USE_VERSION_SYMBOLS AND (NOT LIBOMP_HAVE_VERSION_SYMBOLS))
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_error_say("Version symbols functionality requested but not available")
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# On multinode systems, larger alignment is desired to avoid false sharing
|
|
|
|
set(LIBOMP_USE_INTERNODE_ALIGNMENT FALSE CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Should larger alignment (4096 bytes) be used for some locks and data structures?")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Build code that allows the OpenMP library to conveniently interface with debuggers
|
|
|
|
set(LIBOMP_USE_DEBUGGER FALSE CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Enable debugger interface code?")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# Should we link to C++ library?
|
|
|
|
set(LIBOMP_USE_STDCPPLIB FALSE CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Should we link to C++ library?")
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2017-07-29 03:05:17 +08:00
|
|
|
# Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) based locks have
|
|
|
|
# __asm code which can be troublesome for some compilers. This feature is also x86 specific.
|
2015-07-16 00:05:30 +08:00
|
|
|
# TODO: Make this a real feature check
|
|
|
|
set(LIBOMP_USE_ADAPTIVE_LOCKS "${LIBOMP_HAVE_ADAPTIVE_LOCKS}" CACHE BOOL
|
2017-07-29 03:05:17 +08:00
|
|
|
"Should Intel(R) TSX lock be compiled (adaptive lock in kmp_lock.cpp). These are x86 specific.")
|
2015-07-16 00:05:30 +08:00
|
|
|
if(LIBOMP_USE_ADAPTIVE_LOCKS AND (NOT LIBOMP_HAVE_ADAPTIVE_LOCKS))
|
2017-07-29 03:05:17 +08:00
|
|
|
libomp_error_say("Adaptive locks (Intel(R) TSX) functionality is only supported on x86 Architecture")
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
2015-07-16 00:05:30 +08:00
|
|
|
# - stats-gathering enables OpenMP stats where things like the number of
|
|
|
|
# parallel regions, clock ticks spent in particular openmp regions are recorded.
|
|
|
|
set(LIBOMP_STATS FALSE CACHE BOOL
|
2015-07-16 00:57:19 +08:00
|
|
|
"Stats-Gathering functionality?")
|
2015-07-16 00:05:30 +08:00
|
|
|
if(LIBOMP_STATS AND (NOT LIBOMP_HAVE_STATS))
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_error_say("Stats-gathering functionality requested but not available")
|
2015-01-16 21:05:23 +08:00
|
|
|
endif()
|
2015-08-12 05:36:41 +08:00
|
|
|
# The stats functionality requires the std c++ library
|
|
|
|
if(LIBOMP_STATS)
|
|
|
|
set(LIBOMP_USE_STDCPPLIB TRUE)
|
|
|
|
endif()
|
2015-07-16 00:05:30 +08:00
|
|
|
|
2016-02-05 03:29:35 +08:00
|
|
|
# Shared library can be switched to a static library
|
|
|
|
set(LIBOMP_ENABLE_SHARED TRUE CACHE BOOL
|
|
|
|
"Shared library instead of static library?")
|
|
|
|
|
|
|
|
if(WIN32 AND NOT LIBOMP_ENABLE_SHARED)
|
|
|
|
libomp_error_say("Static libraries requested but not available on Windows")
|
|
|
|
endif()
|
|
|
|
|
2017-04-01 00:20:07 +08:00
|
|
|
if(LIBOMP_USE_ITT_NOTIFY AND NOT LIBOMP_ENABLE_SHARED)
|
|
|
|
message(STATUS "ITT Notify not supported for static libraries - forcing ITT Notify off")
|
|
|
|
set(LIBOMP_USE_ITT_NOTIFY FALSE)
|
|
|
|
endif()
|
|
|
|
|
2017-08-03 04:10:00 +08:00
|
|
|
if(LIBOMP_USE_VERSION_SYMBOLS AND (NOT LIBOMP_ENABLE_SHARED) )
|
|
|
|
message(STATUS "Version symbols not supported for static libraries - forcing Version symbols functionality off")
|
|
|
|
set (LIBOMP_USE_VERSION_SYMBOLS FALSE)
|
|
|
|
endif()
|
|
|
|
|
2018-01-03 05:09:00 +08:00
|
|
|
# OMPT-support defaults to ON for OpenMP 5.0+ and if the requirements in
|
|
|
|
# cmake/config-ix.cmake are fulfilled.
|
|
|
|
set(OMPT_DEFAULT FALSE)
|
2018-08-02 22:34:08 +08:00
|
|
|
if ((${LIBOMP_OMP_VERSION} GREATER 49) AND (LIBOMP_HAVE_OMPT_SUPPORT) AND (NOT WIN32))
|
2018-01-03 05:09:00 +08:00
|
|
|
set(OMPT_DEFAULT TRUE)
|
|
|
|
endif()
|
|
|
|
set(LIBOMP_OMPT_SUPPORT ${OMPT_DEFAULT} CACHE BOOL
|
|
|
|
"OMPT-support?")
|
|
|
|
|
2015-11-06 00:54:55 +08:00
|
|
|
set(LIBOMP_OMPT_DEBUG FALSE CACHE BOOL
|
|
|
|
"Trace OMPT initialization?")
|
2017-11-01 18:08:30 +08:00
|
|
|
set(LIBOMP_OMPT_OPTIONAL TRUE CACHE BOOL
|
|
|
|
"OMPT-optional?")
|
2015-07-16 00:05:30 +08:00
|
|
|
if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT))
|
2017-10-21 04:14:46 +08:00
|
|
|
libomp_error_say("OpenMP Tools Interface requested but not available in this implementation")
|
|
|
|
endif()
|
|
|
|
if(LIBOMP_OMPT_SUPPORT AND (${LIBOMP_OMP_VERSION} LESS 50))
|
2017-11-04 02:28:19 +08:00
|
|
|
libomp_error_say("OpenMP Tools Interface only available with OpenMP 5.0, LIBOMP_OMP_VERSION is ${LIBOMP_OMP_VERSION}")
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
|
2016-11-07 23:58:36 +08:00
|
|
|
# TSAN-support
|
|
|
|
set(LIBOMP_TSAN_SUPPORT FALSE CACHE BOOL
|
|
|
|
"TSAN-support?")
|
|
|
|
if(LIBOMP_TSAN_SUPPORT AND (NOT LIBOMP_HAVE_TSAN_SUPPORT))
|
|
|
|
libomp_error_say("TSAN functionality requested but not available")
|
|
|
|
endif()
|
|
|
|
|
2015-12-01 04:02:59 +08:00
|
|
|
# Error check hwloc support after config-ix has run
|
|
|
|
if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC))
|
|
|
|
libomp_error_say("Hwloc requested but not available")
|
|
|
|
endif()
|
|
|
|
|
[OpenMP] Introduce hierarchical scheduling
This patch introduces the logic implementing hierarchical scheduling.
First and foremost, hierarchical scheduling is off by default
To enable, use -DLIBOMP_USE_HIER_SCHED=On during CMake's configure stage.
This work is based off if the IWOMP paper:
"Workstealing and Nested Parallelism in SMP Systems"
Hierarchical scheduling is the layering of OpenMP schedules for different layers
of the memory hierarchy. One can have multiple layers between the threads and
the global iterations space. The threads will go up the hierarchy to grab
iterations, using possibly a different schedule & chunk for each layer.
[ Global iteration space (0-999) ]
(use static)
[ L1 | L1 | L1 | L1 ]
(use dynamic,1)
[ T0 T1 | T2 T3 | T4 T5 | T6 T7 ]
In the example shown above, there are 8 threads and 4 L1 caches begin targeted.
If the topology indicates that there are two threads per core, then two
consecutive threads will share the data of one L1 cache unit. This example
would have the iteration space (0-999) split statically across the four L1
caches (so the first L1 would get (0-249), the second would get (250-499), etc).
Then the threads will use a dynamic,1 schedule to grab iterations from the L1
cache units. There are currently four supported layers: L1, L2, L3, NUMA
OMP_SCHEDULE can now read a hierarchical schedule with this syntax:
OMP_SCHEDULE='EXPERIMENTAL LAYER,SCHED[,CHUNK][:LAYER,SCHED[,CHUNK]...]:SCHED,CHUNK
And OMP_SCHEDULE can still read the normal SCHED,CHUNK syntax from before
I've kept most of the hierarchical scheduling logic inside kmp_dispatch_hier.h
to try to keep it separate from the rest of the code.
Differential Revision: https://reviews.llvm.org/D47962
llvm-svn: 336571
2018-07-10 01:51:13 +08:00
|
|
|
# Hierarchical scheduling support
|
|
|
|
set(LIBOMP_USE_HIER_SCHED FALSE CACHE BOOL
|
|
|
|
"Hierarchical scheduling support?")
|
|
|
|
|
2014-08-05 17:32:28 +08:00
|
|
|
# Setting final library name
|
2015-07-16 00:05:30 +08:00
|
|
|
set(LIBOMP_DEFAULT_LIB_NAME libomp)
|
2014-08-05 17:32:28 +08:00
|
|
|
if(${PROFILE_LIBRARY})
|
2015-07-16 00:57:19 +08:00
|
|
|
set(LIBOMP_DEFAULT_LIB_NAME ${LIBOMP_DEFAULT_LIB_NAME}prof)
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
|
|
|
if(${STUBS_LIBRARY})
|
2015-07-16 00:57:19 +08:00
|
|
|
set(LIBOMP_DEFAULT_LIB_NAME ${LIBOMP_DEFAULT_LIB_NAME}stubs)
|
2014-08-05 17:32:28 +08:00
|
|
|
endif()
|
2015-07-16 00:05:30 +08:00
|
|
|
set(LIBOMP_LIB_NAME ${LIBOMP_DEFAULT_LIB_NAME} CACHE STRING "Base OMP library name")
|
2016-02-05 03:29:35 +08:00
|
|
|
|
|
|
|
if(${LIBOMP_ENABLE_SHARED})
|
|
|
|
set(LIBOMP_LIBRARY_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
|
2016-05-18 04:51:24 +08:00
|
|
|
set(LIBOMP_LIBRARY_KIND SHARED)
|
|
|
|
set(LIBOMP_INSTALL_KIND LIBRARY)
|
2016-02-05 03:29:35 +08:00
|
|
|
else()
|
|
|
|
set(LIBOMP_LIBRARY_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
|
2016-05-18 04:51:24 +08:00
|
|
|
set(LIBOMP_LIBRARY_KIND STATIC)
|
|
|
|
set(LIBOMP_INSTALL_KIND ARCHIVE)
|
2016-02-05 03:29:35 +08:00
|
|
|
endif()
|
|
|
|
|
|
|
|
set(LIBOMP_LIB_FILE ${LIBOMP_LIB_NAME}${LIBOMP_LIBRARY_SUFFIX})
|
2014-08-05 17:32:28 +08:00
|
|
|
|
2016-09-15 01:46:27 +08:00
|
|
|
# Optional backwards compatibility aliases.
|
|
|
|
set(LIBOMP_INSTALL_ALIASES TRUE CACHE BOOL
|
|
|
|
"Install libgomp and libiomp5 library aliases for backwards compatibility")
|
|
|
|
|
2014-08-05 17:32:28 +08:00
|
|
|
# Print configuration after all variables are set.
|
2017-11-30 03:31:43 +08:00
|
|
|
if(${OPENMP_STANDALONE_BUILD})
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_say("Operating System -- ${CMAKE_SYSTEM_NAME}")
|
|
|
|
libomp_say("Target Architecture -- ${LIBOMP_ARCH}")
|
|
|
|
if(${MIC})
|
|
|
|
libomp_say("Intel(R) MIC Architecture -- ${LIBOMP_MIC_ARCH}")
|
|
|
|
endif()
|
|
|
|
libomp_say("Build Type -- ${CMAKE_BUILD_TYPE}")
|
|
|
|
libomp_say("OpenMP Version -- ${LIBOMP_OMP_VERSION}")
|
2016-02-05 03:29:35 +08:00
|
|
|
libomp_say("Library Kind -- ${LIBOMP_LIBRARY_KIND}")
|
|
|
|
libomp_say("Library Type -- ${LIBOMP_LIB_TYPE}")
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_say("Fortran Modules -- ${LIBOMP_FORTRAN_MODULES}")
|
|
|
|
# will say development if all zeros
|
2015-08-29 02:42:10 +08:00
|
|
|
if(${LIBOMP_VERSION_BUILD} STREQUAL 00000000)
|
2015-07-16 00:57:19 +08:00
|
|
|
set(LIBOMP_BUILD Development)
|
|
|
|
else()
|
2015-08-29 02:42:10 +08:00
|
|
|
set(LIBOMP_BUILD ${LIBOMP_VERSION_BUILD})
|
2015-07-16 00:57:19 +08:00
|
|
|
endif()
|
|
|
|
libomp_say("Build -- ${LIBOMP_BUILD}")
|
|
|
|
libomp_say("Use Stats-gathering -- ${LIBOMP_STATS}")
|
|
|
|
libomp_say("Use Debugger-support -- ${LIBOMP_USE_DEBUGGER}")
|
2016-05-27 02:19:10 +08:00
|
|
|
libomp_say("Use ITT notify -- ${LIBOMP_USE_ITT_NOTIFY}")
|
2015-07-16 00:57:19 +08:00
|
|
|
libomp_say("Use OMPT-support -- ${LIBOMP_OMPT_SUPPORT}")
|
|
|
|
if(${LIBOMP_OMPT_SUPPORT})
|
2017-11-01 18:08:30 +08:00
|
|
|
libomp_say("Use OMPT-optional -- ${LIBOMP_OMPT_OPTIONAL}")
|
2015-07-16 00:57:19 +08:00
|
|
|
endif()
|
|
|
|
libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
|
|
|
|
libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}")
|
2016-11-07 23:58:36 +08:00
|
|
|
libomp_say("Use TSAN-support -- ${LIBOMP_TSAN_SUPPORT}")
|
2015-12-01 04:02:59 +08:00
|
|
|
libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}")
|
2015-05-06 04:02:52 +08:00
|
|
|
endif()
|
|
|
|
|
2015-06-12 01:23:57 +08:00
|
|
|
add_subdirectory(src)
|
2015-09-22 04:41:31 +08:00
|
|
|
add_subdirectory(test)
|