mirror of https://github.com/lammps/lammps.git
Merge pull request #2783 from stanmoore1/kk_update_3.4.1
Update Kokkos library in LAMMPS to v3.4.1
This commit is contained in:
commit
b4e5298bf8
|
@ -37,8 +37,8 @@ if(DOWNLOAD_KOKKOS)
|
|||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.4.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "c2fdcedb6953e6160c765366f6045abb" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "4c84698917c93a18985b311bb6caf84f" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
mark_as_advanced(KOKKOS_URL)
|
||||
mark_as_advanced(KOKKOS_MD5)
|
||||
ExternalProject_Add(kokkos_build
|
||||
|
@ -58,7 +58,7 @@ if(DOWNLOAD_KOKKOS)
|
|||
target_link_libraries(lmp PRIVATE LAMMPS::KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 3.4.00 REQUIRED CONFIG)
|
||||
find_package(Kokkos 3.4.01 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
|
|
|
@ -457,6 +457,9 @@ They must be specified in uppercase.
|
|||
* - ZEN2
|
||||
- HOST
|
||||
- AMD Zen2 class CPU (AVX 2)
|
||||
* - ZEN3
|
||||
- HOST
|
||||
- AMD Zen3 class CPU (AVX 2)
|
||||
* - ARMV80
|
||||
- HOST
|
||||
- ARMv8.0 Compatible CPU
|
||||
|
@ -560,7 +563,7 @@ They must be specified in uppercase.
|
|||
- GPU
|
||||
- Intel GPUs Gen9+
|
||||
|
||||
This list was last updated for version 3.4 of the Kokkos library.
|
||||
This list was last updated for version 3.4.1 of the Kokkos library.
|
||||
|
||||
.. tabs::
|
||||
|
||||
|
|
|
@ -262,6 +262,9 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
|
|||
* Kokkos_ARCH_ZEN2
|
||||
* Whether to optimize for the Zen2 architecture
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ARCH_ZEN3
|
||||
* Whether to optimize for the Zen3 architecture
|
||||
* BOOL Default: OFF
|
||||
* Kokkos_ARCH_HSW
|
||||
* Whether to optimize for the HSW architecture
|
||||
* BOOL Default: OFF
|
||||
|
|
|
@ -1,5 +1,20 @@
|
|||
# Change Log
|
||||
|
||||
## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01)
|
||||
|
||||
**Bug Fixes:**
|
||||
- Windows: Remove atomic_compare_exchange_strong overload conflicts with Windows [\#4024](https://github.com/kokkos/kokkos/pull/4024)
|
||||
- OpenMPTarget: Fixup allocation headers with OpenMPTarget backend [\#4020](https://github.com/kokkos/kokkos/pull/4020)
|
||||
- OpenMPTarget: Add missing specailization for OMPT to Kokkos Random [\#4022](https://github.com/kokkos/kokkos/pull/4022)
|
||||
- AMD: Add support for AMD Zen3 CPU architecture [\#4021](https://github.com/kokkos/kokkos/pull/4021)
|
||||
- SYCL: Implement SYCL::print_configuration [\#4012](https://github.com/kokkos/kokkos/pull/4012)
|
||||
- Containers: staticcsrgraph: use device type instead of execution space to construct views [\#3998](https://github.com/kokkos/kokkos/pull/3998)
|
||||
- nvcc_wrapper: fix errors in argument handling, suppress duplicates of GPU architecture and RDC flags [\#4006](https://github.com/kokkos/kokkos/pull/4006)
|
||||
- CI: Add icpx testing to intel container [\#4004](https://github.com/kokkos/kokkos/pull/4004)
|
||||
- CMake/TRIBITS: Keep quoted compiler flags when passing to Trilinos [\#4007](https://github.com/kokkos/kokkos/pull/4007)
|
||||
- CMake: Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945)
|
||||
|
||||
## [3.4.00](https://github.com/kokkos/kokkos/tree/3.4.00) (2021-04-25)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.3.01...3.4.00)
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ ENDIF()
|
|||
|
||||
set(Kokkos_VERSION_MAJOR 3)
|
||||
set(Kokkos_VERSION_MINOR 4)
|
||||
set(Kokkos_VERSION_PATCH 00)
|
||||
set(Kokkos_VERSION_PATCH 01)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
|
@ -206,8 +206,13 @@ ENDIF()
|
|||
IF (KOKKOS_HAS_TRILINOS)
|
||||
# Overwrite the old flags at the top-level
|
||||
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
|
||||
# we have to match the annoying behavior
|
||||
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
|
||||
# we have to match the annoying behavior, also we have to preserve quotes
|
||||
# which needs another workaround.
|
||||
SET(KOKKOS_COMPILE_OPTIONS_TMP)
|
||||
FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS})
|
||||
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP \"${OPTION}\")
|
||||
ENDFOREACH()
|
||||
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}")
|
||||
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
|
||||
IF (KOKKOS_ENABLE_CUDA)
|
||||
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
|
||||
|
|
|
@ -12,7 +12,7 @@ endif
|
|||
|
||||
KOKKOS_VERSION_MAJOR = 3
|
||||
KOKKOS_VERSION_MINOR = 4
|
||||
KOKKOS_VERSION_PATCH = 00
|
||||
KOKKOS_VERSION_PATCH = 01
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,OpenMP,Pthread,Serial
|
||||
|
@ -24,7 +24,7 @@ KOKKOS_DEVICES ?= "OpenMP"
|
|||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: Vega900,Vega906,Vega908
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
||||
KOKKOS_ARCH ?= ""
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
|
@ -382,6 +382,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
|
|||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
|
||||
|
@ -391,12 +392,12 @@ KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega
|
|||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
|
||||
|
||||
# Decide what ISA level we are able to support.
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2))
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
|
||||
|
@ -790,6 +791,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1)
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx2
|
||||
KOKKOS_LDFLAGS += -mavx2
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=znver3 -mtune=znver3
|
||||
KOKKOS_LDFLAGS += -march=znver3 -mtune=znver3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
|
||||
|
@ -1181,7 +1195,6 @@ endif
|
|||
|
||||
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
|
||||
endif
|
||||
|
||||
# Functions for generating config header file
|
||||
kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1)
|
||||
|
@ -1232,6 +1245,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
|||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
endif
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
|
||||
|
|
|
@ -5,10 +5,12 @@ KOKKOS_SUBPACKAGE(Algorithms)
|
|||
IF (NOT Kokkos_INSTALL_TESTING)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
IF(NOT (KOKKOS_ENABLE_OPENMPTARGET
|
||||
AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR
|
||||
KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)))
|
||||
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_SUBPACKAGE_POSTPROCESS()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -687,6 +687,24 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
|
|||
};
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_OPENMPTARGET
|
||||
template <>
|
||||
struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
|
||||
using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks) {
|
||||
const int team_size = omp_get_num_threads();
|
||||
int i = omp_get_team_num() * team_size + omp_get_thread_num();
|
||||
const int lock_size = locks.extent_int(0);
|
||||
|
||||
while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
|
||||
i = (i + 1) % lock_size;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
template <class DeviceType>
|
||||
|
|
|
@ -44,7 +44,7 @@ IF(Kokkos_ENABLE_OPENMP)
|
|||
)
|
||||
ENDIF()
|
||||
|
||||
foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL)
|
||||
foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
# Because there is always an exception to the rule
|
||||
if(Tag STREQUAL "Threads")
|
||||
set(DEVICE "PTHREAD")
|
||||
|
|
|
@ -109,6 +109,16 @@ struct RandomProperties {
|
|||
}
|
||||
};
|
||||
|
||||
// FIXME_OPENMPTARGET: Need this for OpenMPTarget because contra to the standard
|
||||
// llvm requires the binary operator defined not just the +=
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
RandomProperties operator+(const RandomProperties& org,
|
||||
const RandomProperties& add) {
|
||||
RandomProperties val = org;
|
||||
val += add;
|
||||
return val;
|
||||
}
|
||||
|
||||
template <class GeneratorPool, class Scalar>
|
||||
struct test_random_functor {
|
||||
using rnd_type = typename GeneratorPool::generator_type;
|
||||
|
|
|
@ -370,7 +370,10 @@ template <class ExecutionSpace, typename KeyType>
|
|||
void test_sort(unsigned int N) {
|
||||
test_1D_sort<ExecutionSpace, KeyType>(N);
|
||||
test_3D_sort<ExecutionSpace, KeyType>(N);
|
||||
// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet.
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
|
||||
#endif
|
||||
test_issue_1160_sort<ExecutionSpace>();
|
||||
}
|
||||
} // namespace Impl
|
||||
|
|
|
@ -67,6 +67,11 @@ shared_versioned_libraries=""
|
|||
|
||||
# Does the User set the architecture
|
||||
arch_set=0
|
||||
arch_flag=""
|
||||
|
||||
# Does the user set RDC?
|
||||
rdc_set=0
|
||||
rdc_flag=""
|
||||
|
||||
# Does the user overwrite the host compiler
|
||||
ccbin_set=0
|
||||
|
@ -190,8 +195,34 @@ do
|
|||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
# Handle nvcc args controlling whether to generated relocatable device code
|
||||
--relocatable-device-code=*|-rdc=*)
|
||||
if [ "$rdc_set" -eq 0 ]; then
|
||||
rdc_set=1
|
||||
rdc_flag="$1"
|
||||
cuda_args="$cuda_args $rdc_flag"
|
||||
elif [ "$rdc_flag" != "$1" ]; then
|
||||
echo "RDC is being set twice with different flags, which is not handled"
|
||||
echo "$rdc_flag"
|
||||
echo "$1"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
-rdc)
|
||||
if [ "$rdc_set" -eq 0 ]; then
|
||||
rdc_set=1
|
||||
rdc_flag="$1 $2"
|
||||
cuda_args="$cuda_args $rdc_flag"
|
||||
shift
|
||||
elif [ "$rdc_flag" != "$1 $2" ]; then
|
||||
echo "RDC is being set twice with different flags, which is not handled"
|
||||
echo "$rdc_flag"
|
||||
echo "$1 $2"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
|
||||
--dryrun|--verbose|--keep|--keep-dir*|-G|-lineinfo|-expt-extended-lambda|-expt-relaxed-constexpr|--resource-usage|-Xptxas*|--fmad=*|--use_fast_math|--Wext-lambda-captures-this|-Wext-lambda-captures-this)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
|
@ -199,13 +230,13 @@ do
|
|||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
|
||||
-maxrregcount=*|--maxrregcount=*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
-rdc=*|-maxrregcount*|--maxrregcount*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle unsupported standard flags
|
||||
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
|
||||
fallback_std_flag="-std=c++14"
|
||||
|
@ -323,20 +354,36 @@ do
|
|||
;;
|
||||
|
||||
#Handle -arch argument (if its not set use a default) this is the version with = sign
|
||||
-arch*|-gencode*)
|
||||
cuda_args="$cuda_args $1"
|
||||
arch_set=1
|
||||
-arch=*|-gencode=*)
|
||||
if [ "$arch_set" -eq 0 ]; then
|
||||
arch_set=1
|
||||
arch_flag="$1"
|
||||
cuda_args="$cuda_args $arch_flag"
|
||||
elif [ "$arch_flag" != "$1" ]; then
|
||||
echo "ARCH is being set twice with different flags, which is not handled"
|
||||
echo "$arch_flag"
|
||||
echo "$1"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
#Handle -arch argument (if its not set use a default) this is the version without = sign
|
||||
-arch|-gencode)
|
||||
if [ "$arch_set" -eq 0 ]; then
|
||||
arch_set=1
|
||||
arch_flag="$1 $2"
|
||||
cuda_args="$cuda_args $arch_flag"
|
||||
shift
|
||||
elif [ "$arch_flag" != "$1 $2" ]; then
|
||||
echo "ARCH is being set twice with different flags, which is not handled"
|
||||
echo "$arch_flag"
|
||||
echo "$1 $2"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
#Handle -code argument (if its not set use a default) this is the version with = sign
|
||||
-code*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle -arch argument (if its not set use a default) this is the version without = sign
|
||||
-arch|-gencode)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
arch_set=1
|
||||
shift
|
||||
;;
|
||||
#Handle -code argument (if its not set use a default) this is the version without = sign
|
||||
-code)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
|
|
|
@ -99,5 +99,6 @@
|
|||
#cmakedefine KOKKOS_ARCH_AMPERE86
|
||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN
|
||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
|
||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN3
|
||||
|
||||
#cmakedefine KOKKOS_IMPL_DISABLE_SYCL_DEVICE_PRINTF
|
||||
|
|
|
@ -63,6 +63,7 @@ KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0")
|
|||
KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6")
|
||||
KOKKOS_ARCH_OPTION(ZEN HOST "AMD Zen architecture")
|
||||
KOKKOS_ARCH_OPTION(ZEN2 HOST "AMD Zen2 architecture")
|
||||
KOKKOS_ARCH_OPTION(ZEN3 HOST "AMD Zen3 architecture")
|
||||
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
|
||||
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
|
||||
KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908")
|
||||
|
@ -215,6 +216,15 @@ IF (KOKKOS_ARCH_ZEN2)
|
|||
SET(KOKKOS_ARCH_AMD_AVX2 ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_ZEN3)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Intel -mavx2
|
||||
DEFAULT -march=znver3 -mtune=znver3
|
||||
)
|
||||
SET(KOKKOS_ARCH_AMD_ZEN3 ON)
|
||||
SET(KOKKOS_ARCH_AMD_AVX2 ON)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_WSM)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Intel -xSSE4.2
|
||||
|
@ -284,7 +294,7 @@ IF (KOKKOS_ARCH_SKX)
|
|||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2)
|
||||
IF (KOKKOS_ARCH_WSM OR KOKKOS_ARCH_SNB OR KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW OR KOKKOS_ARCH_KNL OR KOKKOS_ARCH_SKX OR KOKKOS_ARCH_ZEN OR KOKKOS_ARCH_ZEN2 OR KOKKOS_ARCH_ZEN3)
|
||||
SET(KOKKOS_USE_ISA_X86_64 ON)
|
||||
ENDIF()
|
||||
|
||||
|
@ -457,7 +467,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
|
|||
ENDIF()
|
||||
IF (KOKKOS_ARCH_INTEL_GEN)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
IntelClang -fopenmp-targets=spir64 -D__STRICT_ANSI__
|
||||
IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
|
|
@ -101,7 +101,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
|||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
IF (INTERNAL_HAVE_INTEL_COMPILER) #not actually Clang
|
||||
SET(KOKKOS_CLANG_IS_INTEL TRUE)
|
||||
SET(KOKKOS_CXX_COMPILER_ID IntelClang CACHE STRING INTERNAL FORCE)
|
||||
SET(KOKKOS_CXX_COMPILER_ID IntelLLVM CACHE STRING INTERNAL FORCE)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ IF(KOKKOS_ENABLE_OPENMP)
|
|||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
Clang -Xcompiler ${ClangOpenMPFlag}
|
||||
IntelClang -Xcompiler -fiopenmp
|
||||
IntelLLVM -Xcompiler -fiopenmp
|
||||
PGI -Xcompiler -mp
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
XL -Xcompiler -qsmp=omp
|
||||
|
@ -70,7 +70,7 @@ IF(KOKKOS_ENABLE_OPENMP)
|
|||
ELSE()
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang ${ClangOpenMPFlag}
|
||||
IntelClang -fiopenmp
|
||||
IntelLLVM -fiopenmp
|
||||
AppleClang -Xpreprocessor -fopenmp
|
||||
PGI -mp
|
||||
Cray NO-VALUE-SPECIFIED
|
||||
|
@ -92,7 +92,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
|
|||
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang ${ClangOpenMPFlag} -Wno-openmp-mapping
|
||||
IntelClang -fiopenmp -Wno-openmp-mapping
|
||||
IntelLLVM -fiopenmp -Wno-openmp-mapping
|
||||
XL -qsmp=omp -qoffload -qnoeh
|
||||
PGI -mp=gpu
|
||||
DEFAULT -fopenmp
|
||||
|
|
|
@ -773,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET)
|
|||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
|
||||
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelClang GNU HIPCC Fujitsu)
|
||||
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu)
|
||||
CMAKE_PARSE_ARGUMENTS(
|
||||
PARSE
|
||||
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
|
||||
|
|
|
@ -114,15 +114,11 @@ namespace Kokkos {
|
|||
template <class StaticCrsGraphType, class InputSizeType>
|
||||
inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
|
||||
const std::string& label, const std::vector<InputSizeType>& input) {
|
||||
using output_type = StaticCrsGraphType;
|
||||
// using input_type = std::vector<InputSizeType>; // unused
|
||||
|
||||
using output_type = StaticCrsGraphType;
|
||||
using entries_type = typename output_type::entries_type;
|
||||
|
||||
using work_type = View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>;
|
||||
using work_type = View<
|
||||
typename output_type::size_type[], typename output_type::array_layout,
|
||||
typename output_type::device_type, typename output_type::memory_traits>;
|
||||
|
||||
output_type output;
|
||||
|
||||
|
@ -161,10 +157,9 @@ inline typename StaticCrsGraphType::staticcrsgraph_type create_staticcrsgraph(
|
|||
|
||||
static_assert(entries_type::rank == 1, "Graph entries view must be rank one");
|
||||
|
||||
using work_type = View<typename output_type::size_type[],
|
||||
typename output_type::array_layout,
|
||||
typename output_type::execution_space,
|
||||
typename output_type::memory_traits>;
|
||||
using work_type = View<
|
||||
typename output_type::size_type[], typename output_type::array_layout,
|
||||
typename output_type::device_type, typename output_type::memory_traits>;
|
||||
|
||||
output_type output;
|
||||
|
||||
|
|
|
@ -179,8 +179,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
|
|||
const RecordBase::function_type arg_dealloc = &deallocate);
|
||||
|
||||
public:
|
||||
std::string get_label() const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
|
||||
const Kokkos::Experimental::OpenMPTargetSpace& arg_space,
|
||||
const std::string& arg_label, const size_t arg_alloc_size) {
|
||||
|
@ -190,10 +188,6 @@ class SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>
|
|||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**\brief Reallocate tracked memory in the space */
|
||||
static void* reallocate_tracked(void* const arg_alloc_ptr,
|
||||
const size_t arg_alloc_size);
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
|
|
@ -113,7 +113,7 @@ class SYCL {
|
|||
void fence() const;
|
||||
|
||||
/// \brief Print configuration information to the given output stream.
|
||||
static void print_configuration(std::ostream&, const bool detail = false);
|
||||
void print_configuration(std::ostream&, const bool detail = false);
|
||||
|
||||
/// \brief Free any resources being consumed by the device.
|
||||
static void impl_finalize();
|
||||
|
@ -131,12 +131,10 @@ class SYCL {
|
|||
sycl::device get_device() const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const SYCLDevice& that) {
|
||||
return that.info(os);
|
||||
return SYCL::impl_sycl_info(os, that.m_device);
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& info(std::ostream& os) const;
|
||||
|
||||
sycl::device m_device;
|
||||
};
|
||||
|
||||
|
@ -154,6 +152,9 @@ class SYCL {
|
|||
}
|
||||
|
||||
private:
|
||||
static std::ostream& impl_sycl_info(std::ostream& os,
|
||||
const sycl::device& device);
|
||||
|
||||
Kokkos::Impl::HostSharedPtr<Impl::SYCLInternal> m_space_instance;
|
||||
};
|
||||
|
||||
|
|
|
@ -107,12 +107,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
|
|||
SharedAllocationRecord<void, void>::m_alloc_size);
|
||||
}
|
||||
|
||||
// TODO: Implement deep copy back see CudaSpace
|
||||
std::string SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace,
|
||||
void>::get_label() const {
|
||||
return std::string("OpenMPTargetAllocation");
|
||||
}
|
||||
|
||||
SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
|
||||
SharedAllocationRecord(
|
||||
const Kokkos::Experimental::OpenMPTargetSpace &arg_space,
|
||||
|
@ -141,23 +135,6 @@ SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
|
|||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void *SharedAllocationRecord<Kokkos::Experimental::OpenMPTargetSpace, void>::
|
||||
reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) {
|
||||
SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr);
|
||||
SharedAllocationRecord *const r_new =
|
||||
allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
|
||||
|
||||
// Kokkos::Impl::DeepCopy<OpenMPTargetSpace,OpenMPTargetSpace>( r_new->data()
|
||||
// , r_old->data()
|
||||
// , std::min( r_old->size() ,
|
||||
// r_new->size() ) );
|
||||
|
||||
RecordBase::increment(r_new);
|
||||
RecordBase::decrement(r_old);
|
||||
|
||||
return r_new->data();
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
|
|
|
@ -105,6 +105,12 @@ bool SYCL::impl_is_initialized() {
|
|||
|
||||
void SYCL::impl_finalize() { Impl::SYCLInternal::singleton().finalize(); }
|
||||
|
||||
void SYCL::print_configuration(std::ostream& s, const bool detailed) {
|
||||
s << "macro KOKKOS_ENABLE_SYCL : defined" << '\n';
|
||||
if (detailed)
|
||||
SYCL::impl_sycl_info(s, m_space_instance->m_queue->get_device());
|
||||
}
|
||||
|
||||
void SYCL::fence() const {
|
||||
Impl::SYCLInternal::fence(*m_space_instance->m_queue);
|
||||
}
|
||||
|
@ -143,119 +149,118 @@ void SYCL::impl_initialize(SYCL::SYCLDevice d) {
|
|||
Impl::SYCLInternal::singleton().initialize(d.get_device());
|
||||
}
|
||||
|
||||
std::ostream& SYCL::SYCLDevice::info(std::ostream& os) const {
|
||||
std::ostream& SYCL::impl_sycl_info(std::ostream& os,
|
||||
const sycl::device& device) {
|
||||
using namespace sycl::info;
|
||||
return os << "Name: " << m_device.get_info<device::name>()
|
||||
<< "\nDriver Version: "
|
||||
<< m_device.get_info<device::driver_version>()
|
||||
<< "\nIs Host: " << m_device.is_host()
|
||||
<< "\nIs CPU: " << m_device.is_cpu()
|
||||
<< "\nIs GPU: " << m_device.is_gpu()
|
||||
<< "\nIs Accelerator: " << m_device.is_accelerator()
|
||||
<< "\nVendor Id: " << m_device.get_info<device::vendor_id>()
|
||||
return os << "Name: " << device.get_info<device::name>()
|
||||
<< "\nDriver Version: " << device.get_info<device::driver_version>()
|
||||
<< "\nIs Host: " << device.is_host()
|
||||
<< "\nIs CPU: " << device.is_cpu()
|
||||
<< "\nIs GPU: " << device.is_gpu()
|
||||
<< "\nIs Accelerator: " << device.is_accelerator()
|
||||
<< "\nVendor Id: " << device.get_info<device::vendor_id>()
|
||||
<< "\nMax Compute Units: "
|
||||
<< m_device.get_info<device::max_compute_units>()
|
||||
<< device.get_info<device::max_compute_units>()
|
||||
<< "\nMax Work Item Dimensions: "
|
||||
<< m_device.get_info<device::max_work_item_dimensions>()
|
||||
<< device.get_info<device::max_work_item_dimensions>()
|
||||
<< "\nMax Work Group Size: "
|
||||
<< m_device.get_info<device::max_work_group_size>()
|
||||
<< device.get_info<device::max_work_group_size>()
|
||||
<< "\nPreferred Vector Width Char: "
|
||||
<< m_device.get_info<device::preferred_vector_width_char>()
|
||||
<< device.get_info<device::preferred_vector_width_char>()
|
||||
<< "\nPreferred Vector Width Short: "
|
||||
<< m_device.get_info<device::preferred_vector_width_short>()
|
||||
<< device.get_info<device::preferred_vector_width_short>()
|
||||
<< "\nPreferred Vector Width Int: "
|
||||
<< m_device.get_info<device::preferred_vector_width_int>()
|
||||
<< device.get_info<device::preferred_vector_width_int>()
|
||||
<< "\nPreferred Vector Width Long: "
|
||||
<< m_device.get_info<device::preferred_vector_width_long>()
|
||||
<< device.get_info<device::preferred_vector_width_long>()
|
||||
<< "\nPreferred Vector Width Float: "
|
||||
<< m_device.get_info<device::preferred_vector_width_float>()
|
||||
<< device.get_info<device::preferred_vector_width_float>()
|
||||
<< "\nPreferred Vector Width Double: "
|
||||
<< m_device.get_info<device::preferred_vector_width_double>()
|
||||
<< device.get_info<device::preferred_vector_width_double>()
|
||||
<< "\nPreferred Vector Width Half: "
|
||||
<< m_device.get_info<device::preferred_vector_width_half>()
|
||||
<< device.get_info<device::preferred_vector_width_half>()
|
||||
<< "\nNative Vector Width Char: "
|
||||
<< m_device.get_info<device::native_vector_width_char>()
|
||||
<< device.get_info<device::native_vector_width_char>()
|
||||
<< "\nNative Vector Width Short: "
|
||||
<< m_device.get_info<device::native_vector_width_short>()
|
||||
<< device.get_info<device::native_vector_width_short>()
|
||||
<< "\nNative Vector Width Int: "
|
||||
<< m_device.get_info<device::native_vector_width_int>()
|
||||
<< device.get_info<device::native_vector_width_int>()
|
||||
<< "\nNative Vector Width Long: "
|
||||
<< m_device.get_info<device::native_vector_width_long>()
|
||||
<< device.get_info<device::native_vector_width_long>()
|
||||
<< "\nNative Vector Width Float: "
|
||||
<< m_device.get_info<device::native_vector_width_float>()
|
||||
<< device.get_info<device::native_vector_width_float>()
|
||||
<< "\nNative Vector Width Double: "
|
||||
<< m_device.get_info<device::native_vector_width_double>()
|
||||
<< device.get_info<device::native_vector_width_double>()
|
||||
<< "\nNative Vector Width Half: "
|
||||
<< m_device.get_info<device::native_vector_width_half>()
|
||||
<< "\nAddress Bits: " << m_device.get_info<device::address_bits>()
|
||||
<< "\nImage Support: " << m_device.get_info<device::image_support>()
|
||||
<< device.get_info<device::native_vector_width_half>()
|
||||
<< "\nAddress Bits: " << device.get_info<device::address_bits>()
|
||||
<< "\nImage Support: " << device.get_info<device::image_support>()
|
||||
<< "\nMax Mem Alloc Size: "
|
||||
<< m_device.get_info<device::max_mem_alloc_size>()
|
||||
<< device.get_info<device::max_mem_alloc_size>()
|
||||
<< "\nMax Read Image Args: "
|
||||
<< m_device.get_info<device::max_read_image_args>()
|
||||
<< device.get_info<device::max_read_image_args>()
|
||||
<< "\nImage2d Max Width: "
|
||||
<< m_device.get_info<device::image2d_max_width>()
|
||||
<< device.get_info<device::image2d_max_width>()
|
||||
<< "\nImage2d Max Height: "
|
||||
<< m_device.get_info<device::image2d_max_height>()
|
||||
<< device.get_info<device::image2d_max_height>()
|
||||
<< "\nImage3d Max Width: "
|
||||
<< m_device.get_info<device::image3d_max_width>()
|
||||
<< device.get_info<device::image3d_max_width>()
|
||||
<< "\nImage3d Max Height: "
|
||||
<< m_device.get_info<device::image3d_max_height>()
|
||||
<< device.get_info<device::image3d_max_height>()
|
||||
<< "\nImage3d Max Depth: "
|
||||
<< m_device.get_info<device::image3d_max_depth>()
|
||||
<< device.get_info<device::image3d_max_depth>()
|
||||
<< "\nImage Max Buffer Size: "
|
||||
<< m_device.get_info<device::image_max_buffer_size>()
|
||||
<< device.get_info<device::image_max_buffer_size>()
|
||||
<< "\nImage Max Array Size: "
|
||||
<< m_device.get_info<device::image_max_array_size>()
|
||||
<< "\nMax Samplers: " << m_device.get_info<device::max_samplers>()
|
||||
<< device.get_info<device::image_max_array_size>()
|
||||
<< "\nMax Samplers: " << device.get_info<device::max_samplers>()
|
||||
<< "\nMax Parameter Size: "
|
||||
<< m_device.get_info<device::max_parameter_size>()
|
||||
<< device.get_info<device::max_parameter_size>()
|
||||
<< "\nMem Base Addr Align: "
|
||||
<< m_device.get_info<device::mem_base_addr_align>()
|
||||
<< device.get_info<device::mem_base_addr_align>()
|
||||
<< "\nGlobal Cache Mem Line Size: "
|
||||
<< m_device.get_info<device::global_mem_cache_line_size>()
|
||||
<< device.get_info<device::global_mem_cache_line_size>()
|
||||
<< "\nGlobal Mem Cache Size: "
|
||||
<< m_device.get_info<device::global_mem_cache_size>()
|
||||
<< device.get_info<device::global_mem_cache_size>()
|
||||
<< "\nGlobal Mem Size: "
|
||||
<< m_device.get_info<device::global_mem_size>()
|
||||
<< device.get_info<device::global_mem_size>()
|
||||
<< "\nMax Constant Buffer Size: "
|
||||
<< m_device.get_info<device::max_constant_buffer_size>()
|
||||
<< device.get_info<device::max_constant_buffer_size>()
|
||||
<< "\nMax Constant Args: "
|
||||
<< m_device.get_info<device::max_constant_args>()
|
||||
<< "\nLocal Mem Size: "
|
||||
<< m_device.get_info<device::local_mem_size>()
|
||||
<< device.get_info<device::max_constant_args>()
|
||||
<< "\nLocal Mem Size: " << device.get_info<device::local_mem_size>()
|
||||
<< "\nError Correction Support: "
|
||||
<< m_device.get_info<device::error_correction_support>()
|
||||
<< device.get_info<device::error_correction_support>()
|
||||
<< "\nHost Unified Memory: "
|
||||
<< m_device.get_info<device::host_unified_memory>()
|
||||
<< device.get_info<device::host_unified_memory>()
|
||||
<< "\nProfiling Timer Resolution: "
|
||||
<< m_device.get_info<device::profiling_timer_resolution>()
|
||||
<< device.get_info<device::profiling_timer_resolution>()
|
||||
<< "\nIs Endian Little: "
|
||||
<< m_device.get_info<device::is_endian_little>()
|
||||
<< "\nIs Available: " << m_device.get_info<device::is_available>()
|
||||
<< device.get_info<device::is_endian_little>()
|
||||
<< "\nIs Available: " << device.get_info<device::is_available>()
|
||||
<< "\nIs Compiler Available: "
|
||||
<< m_device.get_info<device::is_compiler_available>()
|
||||
<< device.get_info<device::is_compiler_available>()
|
||||
<< "\nIs Linker Available: "
|
||||
<< m_device.get_info<device::is_linker_available>()
|
||||
<< device.get_info<device::is_linker_available>()
|
||||
<< "\nQueue Profiling: "
|
||||
<< m_device.get_info<device::queue_profiling>()
|
||||
<< device.get_info<device::queue_profiling>()
|
||||
<< "\nBuilt In Kernels: "
|
||||
<< Container<std::vector<std::string>>(
|
||||
m_device.get_info<device::built_in_kernels>())
|
||||
<< "\nVendor: " << m_device.get_info<device::vendor>()
|
||||
<< "\nProfile: " << m_device.get_info<device::profile>()
|
||||
<< "\nVersion: " << m_device.get_info<device::version>()
|
||||
device.get_info<device::built_in_kernels>())
|
||||
<< "\nVendor: " << device.get_info<device::vendor>()
|
||||
<< "\nProfile: " << device.get_info<device::profile>()
|
||||
<< "\nVersion: " << device.get_info<device::version>()
|
||||
<< "\nExtensions: "
|
||||
<< Container<std::vector<std::string>>(
|
||||
m_device.get_info<device::extensions>())
|
||||
device.get_info<device::extensions>())
|
||||
<< "\nPrintf Buffer Size: "
|
||||
<< m_device.get_info<device::printf_buffer_size>()
|
||||
<< device.get_info<device::printf_buffer_size>()
|
||||
<< "\nPreferred Interop User Sync: "
|
||||
<< m_device.get_info<device::preferred_interop_user_sync>()
|
||||
<< device.get_info<device::preferred_interop_user_sync>()
|
||||
<< "\nPartition Max Sub Devices: "
|
||||
<< m_device.get_info<device::partition_max_sub_devices>()
|
||||
<< device.get_info<device::partition_max_sub_devices>()
|
||||
<< "\nReference Count: "
|
||||
<< m_device.get_info<device::reference_count>() << '\n';
|
||||
<< device.get_info<device::reference_count>() << '\n';
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
|
@ -293,15 +298,13 @@ void SYCLSpaceInitializer::fence() {
|
|||
}
|
||||
|
||||
void SYCLSpaceInitializer::print_configuration(std::ostream& msg,
|
||||
const bool /*detail*/) {
|
||||
const bool detail) {
|
||||
msg << "Devices:" << std::endl;
|
||||
msg << " KOKKOS_ENABLE_SYCL: ";
|
||||
msg << "yes" << std::endl;
|
||||
|
||||
msg << "\nRuntime Configuration:" << std::endl;
|
||||
// FIXME_SYCL not implemented
|
||||
std::abort();
|
||||
// Experimental::SYCL::print_configuration(msg, detail);
|
||||
Experimental::SYCL{}.print_configuration(msg, detail);
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
|
|
@ -152,12 +152,6 @@ inline T atomic_compare_exchange(
|
|||
((LONGLONG*)&compare_and_result));
|
||||
return compare_and_result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T atomic_compare_exchange_strong(volatile T* const dest,
|
||||
const T& compare, const T& val) {
|
||||
return atomic_compare_exchange(dest, compare, val);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Kokkos
|
||||
|
|
|
@ -4,7 +4,7 @@ HostArch=(SNB HSW SKX KNL)
|
|||
DeviceArch=(Kepler35 Kepler37 Pascal60 Pascal61 Volta70)
|
||||
if [ ! -z "$KOKKOS_HOST_ARCH_TEST" ]; then
|
||||
export KOKKOS_ARCH_TEST=1
|
||||
HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 ARMv8_ThunderX ARMv8_ThunderX2)
|
||||
HostArch=(WSM SNB HSW SKX WSM AMDAVX ARMv80 ARMv81 BDW KNC KNL BGQ Power7 Power8 Power9 Zen Zen2 Zen3 ARMv8_ThunderX ARMv8_ThunderX2)
|
||||
DeviceArch=()
|
||||
fi
|
||||
|
||||
|
|
|
@ -157,6 +157,7 @@ display_help_text() {
|
|||
echo " AMDAVX = AMD CPU"
|
||||
echo " ZEN = AMD Zen-Core CPU"
|
||||
echo " ZEN2 = AMD Zen2-Core CPU"
|
||||
echo " ZEN3 = AMD Zen3-Core CPU"
|
||||
echo " [AMD: GPU]"
|
||||
echo " VEGA900 = AMD GPU MI25 GFX900"
|
||||
echo " VEGA906 = AMD GPU MI50/MI60 GFX906"
|
||||
|
|
|
@ -137,6 +137,7 @@ do
|
|||
echo " AMDAVX = AMD CPU"
|
||||
echo " ZEN = AMD Zen-Core CPU"
|
||||
echo " ZEN2 = AMD Zen2-Core CPU"
|
||||
echo " ZEN3 = AMD Zen3-Core CPU"
|
||||
echo " [ARM]"
|
||||
echo " ARMv80 = ARMv8.0 Compatible CPU"
|
||||
echo " ARMv81 = ARMv8.1 Compatible CPU"
|
||||
|
|
|
@ -24,3 +24,4 @@ tag: 3.2.00 date: 08:19:2020 master: 3b2fdc7e release: 5dc6d303
|
|||
tag: 3.3.00 date: 12:16:2020 master: 734f577a release: 1535ba5c
|
||||
tag: 3.3.01 date: 01:06:2021 master: 6d65b5a3 release: 4d23839c
|
||||
tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6
|
||||
tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8
|
||||
|
|
Loading…
Reference in New Issue