Merge pull request #2783 from stanmoore1/kk_update_3.4.1

Update Kokkos library in LAMMPS to v3.4.1
This commit is contained in:
Axel Kohlmeyer 2021-05-27 15:10:34 -04:00 committed by GitHub
commit b4e5298bf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 255 additions and 157 deletions

View File

@ -37,8 +37,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.4.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "c2fdcedb6953e6160c765366f6045abb" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "4c84698917c93a18985b311bb6caf84f" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
ExternalProject_Add(kokkos_build
@ -58,7 +58,7 @@ if(DOWNLOAD_KOKKOS)
target_link_libraries(lmp PRIVATE LAMMPS::KOKKOS)
add_dependencies(LAMMPS::KOKKOS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.4.00 REQUIRED CONFIG)
find_package(Kokkos 3.4.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
else()

View File

@ -457,6 +457,9 @@ They must be specified in uppercase.
* - ZEN2
- HOST
- AMD Zen2 class CPU (AVX 2)
* - ZEN3
- HOST
- AMD Zen3 class CPU (AVX 2)
* - ARMV80
- HOST
- ARMv8.0 Compatible CPU
@ -560,7 +563,7 @@ They must be specified in uppercase.
- GPU
- Intel GPUs Gen9+
This list was last updated for version 3.4 of the Kokkos library.
This list was last updated for version 3.4.1 of the Kokkos library.
.. tabs::

View File

@ -262,6 +262,9 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
* Kokkos_ARCH_ZEN2
* Whether to optimize for the Zen2 architecture
* BOOL Default: OFF
* Kokkos_ARCH_ZEN3
* Whether to optimize for the Zen3 architecture
* BOOL Default: OFF
* Kokkos_ARCH_HSW
* Whether to optimize for the HSW architecture
* BOOL Default: OFF

View File

@ -1,5 +1,20 @@
# Change Log
## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01)
**Bug Fixes:**
- Windows: Remove atomic_compare_exchange_strong overload conflicts with Windows [\#4024](https://github.com/kokkos/kokkos/pull/4024)
- OpenMPTarget: Fixup allocation headers with OpenMPTarget backend [\#4020](https://github.com/kokkos/kokkos/pull/4020)
- OpenMPTarget: Add missing specailization for OMPT to Kokkos Random [\#4022](https://github.com/kokkos/kokkos/pull/4022)
- AMD: Add support for AMD Zen3 CPU architecture [\#4021](https://github.com/kokkos/kokkos/pull/4021)
- SYCL: Implement SYCL::print_configuration [\#4012](https://github.com/kokkos/kokkos/pull/4012)
- Containers: staticcsrgraph: use device type instead of execution space to construct views [\#3998](https://github.com/kokkos/kokkos/pull/3998)
- nvcc_wrapper: fix errors in argument handling, suppress duplicates of GPU architecture and RDC flags [\#4006](https://github.com/kokkos/kokkos/pull/4006)
- CI: Add icpx testing to intel container [\#4004](https://github.com/kokkos/kokkos/pull/4004)
- CMake/TRIBITS: Keep quoted compiler flags when passing to Trilinos [\#4007](https://github.com/kokkos/kokkos/pull/4007)
- CMake: Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945)
## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00)

View File

@ -112,7 +112,7 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 4)
set(Kokkos_VERSION_PATCH 00)
set(Kokkos_VERSION_PATCH 01)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -206,8 +206,13 @@ ENDIF()
IF (KOKKOS_HAS_TRILINOS)
# Overwrite the old flags at the top-level
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
# we have to match the annoying behavior
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
# we have to match the annoying behavior, also we have to preserve quotes
# which needs another workaround.
SET(KOKKOS_COMPILE_OPTIONS_TMP)
FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP \"${OPTION}\")
ENDFOREACH()
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
IF (KOKKOS_ENABLE_CUDA)
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})

View File

@ -12,7 +12,7 @@ endif
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 4
KOKKOS_VERSION_PATCH = 00
KOKKOS_VERSION_PATCH = 01
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,OpenMP,Pthread,Serial
@ -24,7 +24,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906,Vega908
# AMD-CPUS: AMDAVX,Zen,Zen2
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -382,6 +382,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
@ -391,12 +392,12 @@ KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -790,6 +791,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx2
KOKKOS_LDFLAGS += -mavx2
else
KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3
KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
@ -1181,7 +1195,6 @@ endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
endif
# Functions for generating config header file
kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1)
@ -1232,6 +1245,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)

View File

@ -5,10 +5,12 @@ KOKKOS_SUBPACKAGE(Algorithms)
IF (NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src)
ENDIF()
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
IF(NOT (KOKKOS_ENABLE_OPENMPTARGET
AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR
KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)))
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
ENDIF()
KOKKOS_SUBPACKAGE_POSTPROCESS()

View File

@ -687,6 +687,24 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
};
#endif
#ifdef KOKKOS_ENABLE_OPENMPTARGET
template <>
struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks) {
const int team_size = omp_get_num_threads();
int i = omp_get_team_num() * team_size + omp_get_thread_num();
const int lock_size = locks.extent_int(0);
while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
i = (i + 1) % lock_size;
}
return i;
}
};
#endif
} // namespace Impl
template <class DeviceType>

View File

@ -44,7 +44,7 @@ IF(Kokkos_ENABLE_OPENMP)
)
ENDIF()
foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL)
foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
# Because there is always an exception to the rule
if(Tag STREQUAL "Threads")
set(DEVICE "PTHREAD")

View File

@ -109,6 +109,16 @@ struct RandomProperties {
}
};
// FIXME_OPENMPTARGET: Need this for OpenMPTarget because contra to the standard
// llvm requires the binary operator defined not just the +=
KOKKOS_INLINE_FUNCTION
RandomProperties operator+(const RandomProperties& org,
const RandomProperties& add) {
RandomProperties val = org;
val += add;
return val;
}
template <class GeneratorPool, class Scalar>
struct test_random_functor {
using rnd_type = typename GeneratorPool::generator_type;

View File

@ -370,7 +370,10 @@ template <class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N) {
test_1D_sort<ExecutionSpace, KeyType>(N);
test_3D_sort<ExecutionSpace, KeyType>(N);
// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet.
#ifndef KOKKOS_ENABLE_OPENMPTARGET
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
#endif
test_issue_1160_sort<ExecutionSpace>();
}
} // namespace Impl

View File

@ -67,6 +67,11 @@ shared_versioned_libraries=""
# Does the User set the architecture
arch_set=0
arch_flag=""
# Does the user set RDC?
rdc_set=0
rdc_flag=""
# Does the user overwrite the host compiler
ccbin_set=0
@ -190,8 +195,34 @@ do
host_only_args="$host_only_args $1 $2"
shift
;;
# Handle nvcc args controlling whether to generated relocatable device code
--relocatable-device-code=*|-rdc=*)
if [ "$rdc_set" -eq 0 ]; then
rdc_set=1
rdc_flag="$1"
cuda_args="$cuda_args $rdc_flag"
elif [ "$rdc_flag" != "$1" ]; then
echo "RDC is being set twice with different flags, which is not handled"
echo "$rdc_flag"
echo "$1"
exit 1
fi
;;
-rdc)
if [ "$rdc_set" -eq 0 ]; then
rdc_set=1
rdc_flag="$1 $2"
cuda_args="$cuda_args $rdc_flag"
shift
elif [ "$rdc_flag" != "$1 $2" ]; then
echo "RDC is being set twice with different flags, which is not handled"
echo "$rdc_flag"
echo "$1 $2"
exit 1
fi
;;
#Handle known nvcc args
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
--dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
@ -199,13 +230,13 @@ do
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
-maxrregcount=*|--maxrregcount=*)
cuda_args="$cuda_args $1"
;;
-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
cuda_args="$cuda_args $1 $2"
shift
;;
-rdc=*|-maxrregcount*|--maxrregcount*)
cuda_args="$cuda_args $1"
;;
#Handle unsupported standard flags
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
fallback_std_flag="-std=c++14"
@ -323,20 +354,36 @@ do
;;
#Handle -arch argument (if its not set use a default) this is the version with = sign
-arch*|-gencode*)
cuda_args="$cuda_args $1"
arch_set=1
-arch=*|-gencode=*)
if [ "$arch_set" -eq 0 ]; then
arch_set=1
arch_flag="$1"
cuda_args="$cuda_args $arch_flag"
elif [ "$arch_flag" != "$1" ]; then
echo "ARCH is being set twice with different flags, which is not handled"
echo "$arch_flag"
echo "$1"
exit 1
fi
;;
#Handle -arch argument (if its not set use a default) this is the version without = sign
-arch|-gencode)
if [ "$arch_set" -eq 0 ]; then
arch_set=1
arch_flag="$1 $2"
cuda_args="$cuda_args $arch_flag"
shift
elif [ "$arch_flag" != "$1 $2" ]; then
echo "ARCH is being set twice with different flags, which is not handled"
echo "$arch_flag"
echo "$1 $2"
exit 1
fi
;;
#Handle -code argument (if its not set use a default) this is the version with = sign
-code*)
cuda_args="$cuda_args $1"
;;
#Handle -arch argument (if its not set use a default) this is the version without = sign
-arch|-gencode)
cuda_args="$cuda_args $1 $2"
arch_set=1
shift
;;
#Handle -code argument (if its not set use a default) this is the version without = sign
-code)
cuda_args="$cuda_args $1 $2"

View File

@ -99,5 +99,6 @@
#cmakedefine KOKKOS_ARCH_AMPERE86
#cmakedefine KOKKOS_ARCH_AMD_ZEN
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
#cmakedefine KOKKOS_ARCH_AMD_ZEN3
#cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF

View File

@ -63,6 +63,7 @@ KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0")
KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6")
KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture")
KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture")
KOKKOS_ARCH_OPTION(ZEN3 HOST "AMD Zen3 architecture")
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908")
@ -215,6 +216,15 @@ IF (KOKKOS_ARCH_ZEN2)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
ENDIF()
IF (KOKKOS_ARCH_ZEN3)
COMPILER_SPECIFIC_FLAGS(
Intel -mavx2
DEFAULT -march=znver3 -mtune=znver3
)
SET(KOKKOS_ARCH_AMD_ZEN3 ON)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
ENDIF()
IF (KOKKOS_ARCH_WSM)
COMPILER_SPECIFIC_FLAGS(
Intel -xSSE4.2
@ -284,7 +294,7 @@ IF (KOKKOS_ARCH_SKX)
)
ENDIF()
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2)
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2 OR KOKKOS_ARCH_ZEN3)
SET(KOKKOS_USE_ISA_X86_64 ON)
ENDIF()
@ -457,7 +467,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
ENDIF()
IF (KOKKOS_ARCH_INTEL_GEN)
COMPILER_SPECIFIC_FLAGS(
IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__
IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
)
ENDIF()
ENDIF()

View File

@ -101,7 +101,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
OUTPUT_STRIP_TRAILING_WHITESPACE)
IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
SET(KOKKOS_CLANG_IS_INTEL TRUE)
SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE)
SET(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE)
ENDIF()
ENDIF()

View File

@ -61,7 +61,7 @@ IF(KOKKOS_ENABLE_OPENMP)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Clang -Xcompiler ${ClangOpenMPFlag}
IntelClang -Xcompiler -fiopenmp
IntelLLVM -Xcompiler -fiopenmp
PGI -Xcompiler -mp
Cray NO-VALUE-SPECIFIED
XL -Xcompiler -qsmp=omp
@ -70,7 +70,7 @@ IF(KOKKOS_ENABLE_OPENMP)
ELSE()
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag}
IntelClang -fiopenmp
IntelLLVM -fiopenmp
AppleClang -Xpreprocessor -fopenmp
PGI -mp
Cray NO-VALUE-SPECIFIED
@ -92,7 +92,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag} -Wno-openmp-mapping
IntelClang -fiopenmp -Wno-openmp-mapping
IntelLLVM -fiopenmp -Wno-openmp-mapping
XL -qsmp=omp -qoffload -qnoeh
PGI -mp=gpu
DEFAULT -fopenmp

View File

@ -773,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET)
ENDFUNCTION()
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIPCC Fujitsu)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"

View File

@ -114,15 +114,11 @@ namespace Kokkos {
template <class StaticCrsGraphType, class InputSizeType>
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
const std::string& label, const std::vector<InputSizeType>& input) {
using output_type = StaticCrsGraphType;
// using input_type = std::vector<InputSizeType>; // unused
using output_type = StaticCrsGraphType;
using entries_type = typename output_type::entries_type;
using work_type = View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>;
using work_type = View<
typename output_type::size_type[], typename output_type::array_layout,
typename output_type::device_type, typename output_type::memory_traits>;
output_type output;
@ -161,10 +157,9 @@ inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
static_assert(entries_type::rank == 1, "Graph entries view must be rank one");
using work_type = View<typename output_type::size_type[],
typename output_type::array_layout,
typename output_type::execution_space,
typename output_type::memory_traits>;
using work_type = View<
typename output_type::size_type[], typename output_type::array_layout,
typename output_type::device_type, typename output_type::memory_traits>;
output_type output;

View File

@ -179,8 +179,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
const RecordBase::function_type arg_dealloc = &deallocate);
public:
std::string get_label() const;
KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
const Kokkos::Experimental::OpenMPTargetSpace& arg_space,
const std::string& arg_label, const size_t arg_alloc_size) {
@ -190,10 +188,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
return nullptr;
#endif
}
/**\brief Reallocate tracked memory in the space */
static void* reallocate_tracked(void* const arg_alloc_ptr,
const size_t arg_alloc_size);
};
} // namespace Impl

View File

@ -113,7 +113,7 @@ class SYCL {
void fence() const;
/// \brief Print configuration information to the given output stream.
static void print_configuration(std::ostream&, const bool detail = false);
void print_configuration(std::ostream&, const bool detail = false);
/// \brief Free any resources being consumed by the device.
static void impl_finalize();
@ -131,12 +131,10 @@ class SYCL {
sycl::device get_device() const;
friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) {
return that.info(os);
return SYCL::impl_sycl_info(os, that.m_device);
}
private:
std::ostream& info(std::ostream& os) const;
sycl::device m_device;
};
@ -154,6 +152,9 @@ class SYCL {
}
private:
static std::ostream& impl_sycl_info(std::ostream& os,
const sycl::device& device);
Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance;
};

View File

@ -107,12 +107,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
SharedAllocationRecord<void, void>::m_alloc_size);
}
// TODO: Implement deep copy back see CudaSpace
std::string SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
void>::get_label() const {
return std::string("OpenMPTargetAllocation");
}
SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
SharedAllocationRecord(
const Kokkos::Experimental::OpenMPTargetSpace &arg_space,
@ -141,23 +135,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
//----------------------------------------------------------------------------
void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) {
SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr);
SharedAllocationRecord *const r_new =
allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
// Kokkos::Impl::DeepCopy<OpenMPTargetSpace,OpenMPTargetSpace>( r_new->data()
// , r_old->data()
// , std::min( r_old->size() ,
// r_new->size() ) );
RecordBase::increment(r_new);
RecordBase::decrement(r_old);
return r_new->data();
}
} // namespace Impl
} // namespace Kokkos

View File

@ -105,6 +105,12 @@ bool SYCL::impl_is_initialized() {
void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); }
void SYCL::print_configuration(std::ostream& s, const bool detailed) {
s << "macro KOKKOS_ENABLE_SYCL : defined" << '\n';
if (detailed)
SYCL::impl_sycl_info(s, m_space_instance->m_queue->get_device());
}
void SYCL::fence() const {
Impl::SYCLInternal::fence(*m_space_instance->m_queue);
}
@ -143,119 +149,118 @@ void SYCL::impl_initialize(SYCL::SYCLDevice d) {
Impl::SYCLInternal::singleton().initialize(d.get_device());
}
std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const {
std::ostream& SYCL::impl_sycl_info(std::ostream& os,
const sycl::device& device) {
using namespace sycl::info;
return os << "Name: " << m_device.get_info<device::name>()
<< "\nDriver Version: "
<< m_device.get_info<device::driver_version>()
<< "\nIs Host: " << m_device.is_host()
<< "\nIs CPU: " << m_device.is_cpu()
<< "\nIs GPU: " << m_device.is_gpu()
<< "\nIs Accelerator: " << m_device.is_accelerator()
<< "\nVendor Id: " << m_device.get_info<device::vendor_id>()
return os << "Name: " << device.get_info<device::name>()
<< "\nDriver Version: " << device.get_info<device::driver_version>()
<< "\nIs Host: " << device.is_host()
<< "\nIs CPU: " << device.is_cpu()
<< "\nIs GPU: " << device.is_gpu()
<< "\nIs Accelerator: " << device.is_accelerator()
<< "\nVendor Id: " << device.get_info<device::vendor_id>()
<< "\nMax Compute Units: "
<< m_device.get_info<device::max_compute_units>()
<< device.get_info<device::max_compute_units>()
<< "\nMax Work Item Dimensions: "
<< m_device.get_info<device::max_work_item_dimensions>()
<< device.get_info<device::max_work_item_dimensions>()
<< "\nMax Work Group Size: "
<< m_device.get_info<device::max_work_group_size>()
<< device.get_info<device::max_work_group_size>()
<< "\nPreferred Vector Width Char: "
<< m_device.get_info<device::preferred_vector_width_char>()
<< device.get_info<device::preferred_vector_width_char>()
<< "\nPreferred Vector Width Short: "
<< m_device.get_info<device::preferred_vector_width_short>()
<< device.get_info<device::preferred_vector_width_short>()
<< "\nPreferred Vector Width Int: "
<< m_device.get_info<device::preferred_vector_width_int>()
<< device.get_info<device::preferred_vector_width_int>()
<< "\nPreferred Vector Width Long: "
<< m_device.get_info<device::preferred_vector_width_long>()
<< device.get_info<device::preferred_vector_width_long>()
<< "\nPreferred Vector Width Float: "
<< m_device.get_info<device::preferred_vector_width_float>()
<< device.get_info<device::preferred_vector_width_float>()
<< "\nPreferred Vector Width Double: "
<< m_device.get_info<device::preferred_vector_width_double>()
<< device.get_info<device::preferred_vector_width_double>()
<< "\nPreferred Vector Width Half: "
<< m_device.get_info<device::preferred_vector_width_half>()
<< device.get_info<device::preferred_vector_width_half>()
<< "\nNative Vector Width Char: "
<< m_device.get_info<device::native_vector_width_char>()
<< device.get_info<device::native_vector_width_char>()
<< "\nNative Vector Width Short: "
<< m_device.get_info<device::native_vector_width_short>()
<< device.get_info<device::native_vector_width_short>()
<< "\nNative Vector Width Int: "
<< m_device.get_info<device::native_vector_width_int>()
<< device.get_info<device::native_vector_width_int>()
<< "\nNative Vector Width Long: "
<< m_device.get_info<device::native_vector_width_long>()
<< device.get_info<device::native_vector_width_long>()
<< "\nNative Vector Width Float: "
<< m_device.get_info<device::native_vector_width_float>()
<< device.get_info<device::native_vector_width_float>()
<< "\nNative Vector Width Double: "
<< m_device.get_info<device::native_vector_width_double>()
<< device.get_info<device::native_vector_width_double>()
<< "\nNative Vector Width Half: "
<< m_device.get_info<device::native_vector_width_half>()
<< "\nAddress Bits: " << m_device.get_info<device::address_bits>()
<< "\nImage Support: " << m_device.get_info<device::image_support>()
<< device.get_info<device::native_vector_width_half>()
<< "\nAddress Bits: " << device.get_info<device::address_bits>()
<< "\nImage Support: " << device.get_info<device::image_support>()
<< "\nMax Mem Alloc Size: "
<< m_device.get_info<device::max_mem_alloc_size>()
<< device.get_info<device::max_mem_alloc_size>()
<< "\nMax Read Image Args: "
<< m_device.get_info<device::max_read_image_args>()
<< device.get_info<device::max_read_image_args>()
<< "\nImage2d Max Width: "
<< m_device.get_info<device::image2d_max_width>()
<< device.get_info<device::image2d_max_width>()
<< "\nImage2d Max Height: "
<< m_device.get_info<device::image2d_max_height>()
<< device.get_info<device::image2d_max_height>()
<< "\nImage3d Max Width: "
<< m_device.get_info<device::image3d_max_width>()
<< device.get_info<device::image3d_max_width>()
<< "\nImage3d Max Height: "
<< m_device.get_info<device::image3d_max_height>()
<< device.get_info<device::image3d_max_height>()
<< "\nImage3d Max Depth: "
<< m_device.get_info<device::image3d_max_depth>()
<< device.get_info<device::image3d_max_depth>()
<< "\nImage Max Buffer Size: "
<< m_device.get_info<device::image_max_buffer_size>()
<< device.get_info<device::image_max_buffer_size>()
<< "\nImage Max Array Size: "
<< m_device.get_info<device::image_max_array_size>()
<< "\nMax Samplers: " << m_device.get_info<device::max_samplers>()
<< device.get_info<device::image_max_array_size>()
<< "\nMax Samplers: " << device.get_info<device::max_samplers>()
<< "\nMax Parameter Size: "
<< m_device.get_info<device::max_parameter_size>()
<< device.get_info<device::max_parameter_size>()
<< "\nMem Base Addr Align: "
<< m_device.get_info<device::mem_base_addr_align>()
<< device.get_info<device::mem_base_addr_align>()
<< "\nGlobal Cache Mem Line Size: "
<< m_device.get_info<device::global_mem_cache_line_size>()
<< device.get_info<device::global_mem_cache_line_size>()
<< "\nGlobal Mem Cache Size: "
<< m_device.get_info<device::global_mem_cache_size>()
<< device.get_info<device::global_mem_cache_size>()
<< "\nGlobal Mem Size: "
<< m_device.get_info<device::global_mem_size>()
<< device.get_info<device::global_mem_size>()
<< "\nMax Constant Buffer Size: "
<< m_device.get_info<device::max_constant_buffer_size>()
<< device.get_info<device::max_constant_buffer_size>()
<< "\nMax Constant Args: "
<< m_device.get_info<device::max_constant_args>()
<< "\nLocal Mem Size: "
<< m_device.get_info<device::local_mem_size>()
<< device.get_info<device::max_constant_args>()
<< "\nLocal Mem Size: " << device.get_info<device::local_mem_size>()
<< "\nError Correction Support: "
<< m_device.get_info<device::error_correction_support>()
<< device.get_info<device::error_correction_support>()
<< "\nHost Unified Memory: "
<< m_device.get_info<device::host_unified_memory>()
<< device.get_info<device::host_unified_memory>()
<< "\nProfiling Timer Resolution: "
<< m_device.get_info<device::profiling_timer_resolution>()
<< device.get_info<device::profiling_timer_resolution>()
<< "\nIs Endian Little: "
<< m_device.get_info<device::is_endian_little>()
<< "\nIs Available: " << m_device.get_info<device::is_available>()
<< device.get_info<device::is_endian_little>()
<< "\nIs Available: " << device.get_info<device::is_available>()
<< "\nIs Compiler Available: "
<< m_device.get_info<device::is_compiler_available>()
<< device.get_info<device::is_compiler_available>()
<< "\nIs Linker Available: "
<< m_device.get_info<device::is_linker_available>()
<< device.get_info<device::is_linker_available>()
<< "\nQueue Profiling: "
<< m_device.get_info<device::queue_profiling>()
<< device.get_info<device::queue_profiling>()
<< "\nBuilt In Kernels: "
<< Container<std::vector<std::string>>(
m_device.get_info<device::built_in_kernels>())
<< "\nVendor: " << m_device.get_info<device::vendor>()
<< "\nProfile: " << m_device.get_info<device::profile>()
<< "\nVersion: " << m_device.get_info<device::version>()
device.get_info<device::built_in_kernels>())
<< "\nVendor: " << device.get_info<device::vendor>()
<< "\nProfile: " << device.get_info<device::profile>()
<< "\nVersion: " << device.get_info<device::version>()
<< "\nExtensions: "
<< Container<std::vector<std::string>>(
m_device.get_info<device::extensions>())
device.get_info<device::extensions>())
<< "\nPrintf Buffer Size: "
<< m_device.get_info<device::printf_buffer_size>()
<< device.get_info<device::printf_buffer_size>()
<< "\nPreferred Interop User Sync: "
<< m_device.get_info<device::preferred_interop_user_sync>()
<< device.get_info<device::preferred_interop_user_sync>()
<< "\nPartition Max Sub Devices: "
<< m_device.get_info<device::partition_max_sub_devices>()
<< device.get_info<device::partition_max_sub_devices>()
<< "\nReference Count: "
<< m_device.get_info<device::reference_count>() << '\n';
<< device.get_info<device::reference_count>() << '\n';
}
namespace Impl {
@ -293,15 +298,13 @@ void SYCLSpaceInitializer::fence() {
}
void SYCLSpaceInitializer::print_configuration(std::ostream& msg,
const bool /*detail*/) {
const bool detail) {
msg << "Devices:" << std::endl;
msg << " KOKKOS_ENABLE_SYCL: ";
msg << "yes" << std::endl;
msg << "\nRuntime Configuration:" << std::endl;
// FIXME_SYCL not implemented
std::abort();
// Experimental::SYCL::print_configuration(msg, detail);
Experimental::SYCL{}.print_configuration(msg, detail);
}
} // namespace Impl

View File

@ -152,12 +152,6 @@ inline T atomic_compare_exchange(
((LONGLONG*)&compare_and_result));
return compare_and_result;
}
template <typename T>
inline T atomic_compare_exchange_strong(volatile T* const dest,
const T& compare, const T& val) {
return atomic_compare_exchange(dest, compare, val);
}
#endif
} // namespace Kokkos

View File

@ -4,7 +4,7 @@ HostArch=(SNB HSW SKX KNL)
DeviceArch=(Kepler35 Kepler37 Pascal60 Pascal61 Volta70)
if [ ! -z "$KOKKOS_HOST_ARCH_TEST" ]; then
export KOKKOS_ARCH_TEST=1
HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 ARMv8_ThunderX ARMv8_ThunderX2)
HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 Zen3 ARMv8_ThunderX ARMv8_ThunderX2)
DeviceArch=()
fi

View File

@ -157,6 +157,7 @@ display_help_text() {
echo " AMDAVX = AMD CPU"
echo " ZEN = AMD Zen-Core CPU"
echo " ZEN2 = AMD Zen2-Core CPU"
echo " ZEN3 = AMD Zen3-Core CPU"
echo " [AMD: GPU]"
echo " VEGA900 = AMD GPU MI25 GFX900"
echo " VEGA906 = AMD GPU MI50/MI60 GFX906"

View File

@ -137,6 +137,7 @@ do
echo " AMDAVX = AMD CPU"
echo " ZEN = AMD Zen-Core CPU"
echo " ZEN2 = AMD Zen2-Core CPU"
echo " ZEN3 = AMD Zen3-Core CPU"
echo " [ARM]"
echo " ARMv80 = ARMv8.0 Compatible CPU"
echo " ARMv81 = ARMv8.1 Compatible CPU"

View File

@ -24,3 +24,4 @@ tag: 3.2.00 date: 08:19:2020 master: 3b2fdc7e release: 5dc6d303
tag: 3.3.00 date: 12:16:2020 master: 734f577a release: 1535ba5c
tag: 3.3.01 date: 01:06:2021 master: 6d65b5a3 release: 4d23839c
tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6
tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8