diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
index 84ce007639..a90e86b9f8 100644
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@@ -6,16 +6,16 @@ ifndef KOKKOS_PATH
endif
CXXFLAGS=$(CCFLAGS)
-# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial
+# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_DEVICES ?= "OpenMP"
-#KOKKOS_DEVICES ?= "Pthread"
+#KOKKOS_DEVICES ?= "Pthreads"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
-# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72
+# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
-# AMD-CPUS: AMDAVX,Ryzen,Epyc
+# AMD-CPUS: AMDAVX,Ryzen,EPYC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@@ -224,7 +224,7 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
- #KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
+ KOKKOS_INTERNAL_CXX14_FLAG := -std=c++14
KOKKOS_INTERNAL_CXX1Y_FLAG := -std=c++1y
#KOKKOS_INTERNAL_CXX17_FLAG := -std=c++17
#KOKKOS_INTERNAL_CXX1Z_FLAG := -std=c++1Z
@@ -276,6 +276,7 @@ KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pas
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
+KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
@@ -284,6 +285,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -300,6 +302,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@@ -331,7 +334,7 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
-KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc)
+KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
@@ -341,12 +344,12 @@ KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx90
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
-KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW))
+KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
-KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
+KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_EPYC))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@@ -658,6 +661,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
endif
endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_EPYC), 1)
+ tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_EPYC")
+ tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AMD_AVX2")
+
+ ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+ KOKKOS_CXXFLAGS += -mavx2
+ KOKKOS_LDFLAGS += -mavx2
+ else
+ KOKKOS_CXXFLAGS += -march=znver1 -mtune=znver1
+ KOKKOS_LDFLAGS += -march=znver1 -mtune=znver1
+ endif
+endif
+
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX")
@@ -950,6 +966,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif
+ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
+ tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING")
+ tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_TURING75")
+ KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
+ endif
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
diff --git a/lib/kokkos/README b/lib/kokkos/README
index 4b6d4170e0..cb6ceb5581 100644
--- a/lib/kokkos/README
+++ b/lib/kokkos/README
@@ -73,6 +73,8 @@ For specifics see the LICENSE file contained in the repository or distribution.
* NVCC 7.5 for CUDA (with gcc 4.8.4)
* NVCC 8.0.44 for CUDA (with gcc 5.3.0)
* NVCC 9.1 for CUDA (with gcc 6.1.0)
+ * NVCC 9.2 for CUDA (with gcc 7.2.0)
+ * NVCC 10.0 for CUDA (with gcc 7.4.0)
### Primary tested compilers on Power 8 are:
* GCC 6.4.0 (OpenMP,Serial)
@@ -109,7 +111,7 @@ GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
-NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
+NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Other compilers are tested occasionally, in particular when pushing from develop to
master branch, without -Werror and only for a select set of backends.
diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper
index f926ae024c..94bc72854e 100755
--- a/lib/kokkos/bin/nvcc_wrapper
+++ b/lib/kokkos/bin/nvcc_wrapper
@@ -308,6 +308,16 @@ do
shift
done
+#Check if nvcc exists
+if [ $host_only -ne 1 ]; then
+ var=$(which nvcc )
+ if [ $? -gt 0 ]; then
+ echo "Could not find nvcc in PATH"
+ exit $?
+ fi
+fi
+
+
# Only print host compiler version
if [ $get_host_version -eq 1 ]; then
$host_compiler --version
diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake
index 580d1d322b..be494e5df0 100644
--- a/lib/kokkos/cmake/kokkos_options.cmake
+++ b/lib/kokkos/cmake/kokkos_options.cmake
@@ -104,6 +104,7 @@ list(APPEND KOKKOS_ARCH_LIST
Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1
Volta70 # (GPU) NVIDIA Volta generation CC 7.0
Volta72 # (GPU) NVIDIA Volta generation CC 7.2
+ Turing75 # (GPU) NVIDIA Turing generation CC 7.5
)
# List of possible device architectures.
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp
index adba0c4158..f6631a4149 100644
--- a/lib/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp
@@ -832,16 +832,14 @@ void
deep_copy (DualView
dst, // trust me, this must not be a reference
const DualView& src )
{
- if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
- return deep_copy(dst.d_view, src.d_view);
- }
- if (src.modified_flags(1) >= src.modified_flags(0)) {
- deep_copy (dst.d_view, src.d_view);
- dst.template modify::device_type> ();
- } else {
+ if ( src.need_sync_device() ) {
deep_copy (dst.h_view, src.h_view);
- dst.template modify::host_mirror_space> ();
+ dst.modify_host();
}
+ else {
+ deep_copy (dst.d_view, src.d_view);
+ dst.modify_device();
+ }
}
template< class ExecutionSpace ,
@@ -852,15 +850,12 @@ deep_copy (const ExecutionSpace& exec ,
DualView dst, // trust me, this must not be a reference
const DualView& src )
{
- if(src.modified_flags.data()==NULL || dst.modified_flags.data()==NULL) {
- return deep_copy(exec, dst.d_view, src.d_view);
- }
- if (src.modified_flags(1) >= src.modified_flags(0)) {
- deep_copy (exec, dst.d_view, src.d_view);
- dst.template modify::device_type> ();
- } else {
+ if ( src.need_sync_device() ) {
deep_copy (exec, dst.h_view, src.h_view);
- dst.template modify::host_mirror_space> ();
+ dst.modify_host();
+ } else {
+ deep_copy (exec, dst.d_view, src.d_view);
+ dst.modify_device();
}
}
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
index 8be2c49a31..3f284e6a8d 100644
--- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -368,8 +368,8 @@ public:
enum { is_assignable = is_assignable_value_type &&
is_assignable_layout };
- typedef ViewMapping< DstTraits , void > DstType ;
- typedef ViewMapping< SrcTraits , void > SrcType ;
+ typedef ViewMapping< DstTraits , typename DstTraits::specialize > DstType ;
+ typedef ViewMapping< SrcTraits , typename SrcTraits::specialize > SrcType ;
template < typename DT , typename ... DP , typename ST , typename ... SP >
KOKKOS_INLINE_FUNCTION
@@ -432,7 +432,7 @@ public:
private:
- typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
+ typedef Kokkos::Impl::ViewMapping< traits , typename traits::specialize > map_type ;
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
track_type m_track ;
@@ -567,11 +567,11 @@ public:
// Allow specializations to query their specialized map
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
KOKKOS_INLINE_FUNCTION
- const Kokkos::Impl::ViewMapping< traits , void > &
+ const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
implementation_map() const { return m_map ; }
#endif
KOKKOS_INLINE_FUNCTION
- const Kokkos::Impl::ViewMapping< traits , void > &
+ const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > &
impl_map() const { return m_map ; }
//----------------------------------------
@@ -952,7 +952,7 @@ public:
, m_rank(rhs.m_rank)
{
typedef typename DynRankView