Updating Kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15556 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor 2016-09-06 23:06:32 +00:00
parent 1ad033ec0c
commit 39be4185c4
502 changed files with 157510 additions and 0 deletions

8
lib/kokkos/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Standard ignores
*~
*.pyc
\#*#
.#*
.*.swp
.cproject
.project

184
lib/kokkos/CMakeLists.txt Normal file
View File

@ -0,0 +1,184 @@
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
ELSE()
SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "")
ENDIF()
IF(NOT KOKKOS_HAS_TRILINOS)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
INCLUDE(cmake/tribits.cmake)
ENDIF()
#
# A) Forward delcare the package so that certain options are also defined for
# subpackages
#
TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
#------------------------------------------------------------------------------
#
# B) Define the common options for Kokkos first so they can be used by
# subpackages as well.
#
# mfh 01 Aug 2016: See Issue #61:
#
# https://github.com/kokkos/kokkos/issues/61
#
# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines
# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead,
# for compatibility with Kokkos' Makefile build system.
TRIBITS_ADD_OPTION_AND_DEFINE(
${PACKAGE_NAME}_ENABLE_DEBUG
${PACKAGE_NAME_UC}_HAVE_DEBUG
"Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build."
${${PROJECT_NAME}_ENABLE_DEBUG}
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_SIERRA_BUILD
KOKKOS_FOR_SIERRA
"Configure Kokkos for building within the Sierra build system."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda
KOKKOS_HAVE_CUDA
"Enable CUDA support in Kokkos."
"${TPL_ENABLE_CUDA}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_UVM
KOKKOS_USE_CUDA_UVM
"Enable CUDA Unified Virtual Memory support in Kokkos."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Pthread
KOKKOS_HAVE_PTHREAD
"Enable Pthread support in Kokkos."
OFF
)
ASSERT_DEFINED(TPL_ENABLE_Pthread)
IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
ENDIF ()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_OpenMP
KOKKOS_HAVE_OPENMP
"Enable OpenMP support in Kokkos."
"${${PROJECT_NAME}_ENABLE_OpenMP}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_QTHREAD
KOKKOS_HAVE_QTHREAD
"Enable QTHREAD support in Kokkos."
"${TPL_ENABLE_QTHREAD}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_CXX11
KOKKOS_HAVE_CXX11
"Enable C++11 support in Kokkos."
"${${PROJECT_NAME}_ENABLE_CXX11}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_HWLOC
KOKKOS_HAVE_HWLOC
"Enable HWLOC support in Kokkos."
"${TPL_ENABLE_HWLOC}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_MPI
KOKKOS_HAVE_MPI
"Enable MPI support in Kokkos."
"${TPL_ENABLE_MPI}"
)
# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option
#
# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check
# option (defined below) is annoyingly not all caps, but we need to
# keep it that way for backwards compatibility. If users forget and
# try using an all-caps variable, then make it count by using the
# all-caps version as the default value of the original, not-all-caps
# option. Otherwise, the default value of this option comes from
# Kokkos_ENABLE_DEBUG (see Issue #367).
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG)
IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK)
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON)
ELSE()
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
ENDIF()
ELSE()
SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}")
ENDIF()
ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Debug_Bounds_Check
KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK
"Enable Kokkos::View run-time bounds checking."
"${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}"
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Profiling
KOKKOS_ENABLE_PROFILING_INTERNAL
"Enable KokkosP profiling support for kernel data collections."
"${TPL_ENABLE_DLlib}"
)
# placeholder for future device...
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Winthread
KOKKOS_HAVE_WINTHREAD
"Enable Winthread support in Kokkos."
"${TPL_ENABLE_Winthread}"
)
# use new/old View
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_USING_DEPRECATED_VIEW
KOKKOS_USING_DEPRECATED_VIEW
"Choose whether to use the old, deprecated Kokkos::View"
OFF
)
#------------------------------------------------------------------------------
#
# C) Process the subpackages for Kokkos
#
TRIBITS_PROCESS_SUBPACKAGES()
#
# D) If Kokkos itself is enabled, process the Kokkos package
#
TRIBITS_PACKAGE_DEF()
TRIBITS_EXCLUDE_AUTOTOOLS_FILES()
TRIBITS_EXCLUDE_FILES(
classic/doc
classic/LinAlg/doc/CrsRefactorNotesMay2012
)
TRIBITS_PACKAGE_POSTPROCESS()

40
lib/kokkos/Copyright.txt Normal file
View File

@ -0,0 +1,40 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -0,0 +1,73 @@
Developers of Kokkos (those who commit modifications to Kokkos)
must maintain the snapshot of Kokkos in the Trilinos repository.
This file contains instructions for how to
snapshot Kokkos from github.com/kokkos to Trilinos.
------------------------------------------------------------------------
*** EVERYTHING GOES RIGHT WORKFLOW ***
1) Given a 'git clone' of Kokkos and of Trilinos repositories.
1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone.
This path *must* terminate with the directory name 'kokkos';
e.g., ${HOME}/kokkos .
1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory.
2) Given that the Kokkos build & test is clean and
changes are committed to the Kokkos clone.
3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
4) Verify the snapshot commit happened as expected
cd ${TRILINOS}/packages/kokkos
git log -1 --name-only
5) Modify, build, and test Trilinos with the Kokkos snapshot.
6) Given that that the Trilinos build & test is clean and
changes are committed to the Trilinos clone.
7) Attempt push to the Kokkos repository.
If push fails then you must 'remove the Kokkos snapshot'
from your Trilinos clone.
See below.
8) Attempt to push to the Trilinos repository.
If updating for a failed push requires you to change Kokkos you must
'remove the Kokkos snapshot' from your Trilinos clone.
See below.
------------------------------------------------------------------------
*** WHEN SOMETHING GOES WRONG AND YOU MUST ***
*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE ***
1) Query the Trilinos clone commit log.
git log --oneline
2) Note the <SHA1> of the commit to the Trillinos clone
immediately BEFORE the Kokkos snapshot commit.
Copy this <SHA1> for use in the next command.
3) IF more than one outstanding commit then you can remove just the
Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file.
Remove or comment out the Kokkos snapshot commit entry.
git rebase -i <SHA1>
4) IF the Kokkos snapshot commit is the one and only
outstanding commit then remove just than commit.
git reset --hard HEAD~1
------------------------------------------------------------------------
*** REGARDING 'snapshot.py' TOOL ***
The 'snapshot.py' tool is developed and maintained by the
Center for Computing Research (CCR)
Software Engineering, Maintenance, and Support (SEMS) team.
Contact Brent Perschbacher <bmpersc@sandia.gov> for questions>
------------------------------------------------------------------------

40
lib/kokkos/LICENSE Normal file
View File

@ -0,0 +1,40 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER

480
lib/kokkos/Makefile.kokkos Normal file
View File

@ -0,0 +1,480 @@
# Default settings common options
#LAMMPS specific settings:
KOKKOS_PATH=../../lib/kokkos
CXXFLAGS=$(CCFLAGS)
#Options: OpenMP,Serial,Pthreads,Cuda
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
#Options: hwloc,librt,experimental_memkind
KOKKOS_USE_TPLS ?= ""
#Options: c++11
KOKKOS_CXX_STANDARD ?= "c++11"
#Options: aggressive_vectorization,disable_profiling
KOKKOS_OPTIONS ?= ""
#Default settings specific options
#Options: force_uvm,use_ldg,rdc,enable_lambda
KOKKOS_CUDA_OPTIONS ?= ""
# Check for general settings
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
# Check for external libraries
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l))
KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l))
# Check for advanced settings
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
# Check for Kokkos Host Execution Spaces one of which must be on
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))
KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# OpenMP is turned on by default in Cray compiler environment
KOKKOS_INTERNAL_OPENMP_FLAG :=
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_CXX11_FLAG := --c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
endif
endif
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
# Check for Kokkos Architecture settings
#Intel based
KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
#NVIDIA based
NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
endif
#ARM based
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
#IBM based
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
#Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
# Decide what ISA level we are able to support
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
$(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1)
$(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) )
endif
#Generating the list of Flags
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
# No warnings:
KOKKOS_CXXFLAGS =
# INTEL and CLANG warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
# GCC warnings:
#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered
KOKKOS_LIBS = -lkokkos -ldl
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_SRC =
KOKKOS_HEADERS =
#Generating the KokkosCore_config.h file
tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp)
tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp)
tmp := $(shell date >> KokkosCore_config.tmp)
tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)
tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include
KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib
tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -G
endif
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_LIBS += -lhwloc
tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
KOKKOS_LIBS += -lrt
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
KOKKOS_LIBS += -lmemkind
tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += --relocatable-device-code=true
KOKKOS_LDFLAGS += --relocatable-device-code=true
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif
#Add Architecture flags
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX2
KOKKOS_LDFLAGS += -xCORE-AVX2
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Assume that this is a really a GNU compiler
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xMIC-AVX512
KOKKOS_LDFLAGS += -xMIC-AVX512
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Asssume that this is really a GNU compiler
KOKKOS_CXXFLAGS += -march=knl
KOKKOS_LDFLAGS += -march=knl
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_61
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else
KOKKOS_INTERNAL_NEW_CONFIG := 1
endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_LIBS += -lpthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
KOKKOS_LIBS += -lqthread
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
else
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
#With Cygwin functions such as fdopen and fileno are not defined
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
#though. So we hard undefine it here. Not sure if that has any bad side effects
#This is needed for gtest actually, not for Kokkos itself!
ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1)
KOKKOS_CXXFLAGS += -U__STRICT_ANSI__
endif
# Setting up dependencies
KokkosCore_config.h:
KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS)
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
ranlib libkokkos.a
KOKKOS_LINK_DEPENDS=libkokkos.a

View File

@ -0,0 +1,72 @@
Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp
Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp
Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp
Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp
Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp
Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp
Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp

152
lib/kokkos/README Normal file
View File

@ -0,0 +1,152 @@
Kokkos implements a programming model in C++ for writing performance portable
applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
The KokkosP interface and associated tools are developed by the Application
Performance Team and Kokkos core developers at Sandia National Laboratories.
To learn more about Kokkos consider watching one of our presentations:
GTC 2015:
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
and feedback is greatly appreciated.
A separate repository with extensive tutorial material can be found under
https://github.com/kokkos/kokkos-tutorials.
If you have a patch to contribute please feel free to issue a pull request against
the develop branch. For major contributions it is better to contact us first
for guidance.
For questions please send an email to
kokkos-users@software.sandia.gov
For non-public questions send an email to
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
====Requirements============================================================
============================================================================
Primary tested compilers on X86 are:
GCC 4.7.2
GCC 4.8.4
GCC 4.9.2
GCC 5.1.0
Intel 14.0.4
Intel 15.0.2
Intel 16.0.1
Clang 3.5.2
Clang 3.6.1
Primary tested compilers on Power 8 are:
IBM XL 13.1.3 (OpenMP,Serial)
GCC 4.9.2 (OpenMP,Serial)
GCC 5.3.0 (OpenMP,Serial)
Secondary tested compilers are:
CUDA 6.5 (with gcc 4.7.2)
CUDA 7.0 (with gcc 4.7.2)
CUDA 7.5 (with gcc 4.8.4)
Other compilers working:
X86:
Intel 17.0.042 (the FENL example causes internal compiler error)
PGI 15.4
Cygwin 2.1.0 64bit with gcc 4.9.3
KNL:
Intel 16.2.181 (the FENL example causes internal compiler error)
Intel 17.0.042 (the FENL example causes internal compiler error)
Known non-working combinations:
Power8:
GCC 6.1.0
Pthreads backend
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...).
We are using the following set of flags:
GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized
Secondary compilers are passing without -Werror.
Other compilers are tested occasionally, in particular when pushing from develop to
master branch, without -Werror and only for a select set of backends.
============================================================================
====Getting started=========================================================
============================================================================
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
in the 'example/tutorial' directory. This will build all examples in the
subfolders.
============================================================================
====Running Unit Tests======================================================
============================================================================
To run the unit tests create a build directory and run the following commands
KOKKOS_PATH/generate_makefile.bash
make build-test
make test
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====Install the library=====================================================
============================================================================
To install Kokkos as a library create a build directory and run the following
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
make lib
make install
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====CMakeFiles==============================================================
============================================================================
The CMake files contained in this repository require Tribits and are used
for integration with Trilinos. They do not currently support a standalone
CMake build.
===========================================================================
====Kokkos and CUDA UVM====================================================
===========================================================================
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
(i) You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
Furthermore in multi socket multi GPU machines, UVM defaults to using
zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.

View File

@ -0,0 +1,10 @@
TRIBITS_SUBPACKAGE(Algorithms)
ADD_SUBDIRECTORY(src)
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
TRIBITS_SUBPACKAGE_POSTPROCESS()

View File

@ -0,0 +1,5 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
LIB_REQUIRED_PACKAGES KokkosCore
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
TEST_OPTIONAL_TPLS CUSPARSE
)

View File

@ -0,0 +1,4 @@
#ifndef KOKKOS_ALGORITHMS_CONFIG_H
#define KOKKOS_ALGORITHMS_CONFIG_H
#endif

View File

@ -0,0 +1,21 @@
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
#-----------------------------------------------------------------------------
FILE(GLOB HEADERS *.hpp)
FILE(GLOB SOURCES *.cpp)
LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
#-----------------------------------------------------------------------------
TRIBITS_ADD_LIBRARY(
kokkosalgorithms
HEADERS ${HEADERS}
SOURCES ${SOURCES}
DEPLIBS
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,496 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SORT_HPP_
#define KOKKOS_SORT_HPP_
#include <Kokkos_Core.hpp>
#include <algorithm>
namespace Kokkos {
namespace SortImpl {
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
struct CopyOp;
template<class ValuesViewType>
struct CopyOp<ValuesViewType,1> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
dst(i_dst) = src(i_src);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,2> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0;j< (int) dst.dimension_1(); j++)
dst(i_dst,j) = src(i_src,j);
}
};
template<class ValuesViewType>
struct CopyOp<ValuesViewType,3> {
template<class DstType, class SrcType>
KOKKOS_INLINE_FUNCTION
static void copy(DstType& dst, size_t i_dst,
SrcType& src, size_t i_src ) {
for(int j = 0; j<dst.dimension_1(); j++)
for(int k = 0; k<dst.dimension_2(); k++)
dst(i_dst,j,k) = src(i_src,j,k);
}
};
}
template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space,
class SizeType = typename KeyViewType::memory_space::size_type>
class BinSort {
public:
template<class ValuesViewType, class PermuteViewType, class CopyOp>
struct bin_sort_sort_functor {
typedef ExecutionSpace execution_space;
typedef typename ValuesViewType::non_const_type values_view_type;
typedef typename ValuesViewType::const_type const_values_view_type;
Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout,
typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values;
values_view_type sorted_values;
typename PermuteViewType::const_type sort_order;
bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_):
values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
//printf("Sort: %i %i\n",i,sort_order(i));
CopyOp::copy(sorted_values,i,values,sort_order(i));
}
};
typedef ExecutionSpace execution_space;
typedef BinSortOp bin_op_type;
struct bin_count_tag {};
struct bin_offset_tag {};
struct bin_binning_tag {};
struct bin_sort_bins_tag {};
public:
typedef SizeType size_type;
typedef size_type value_type;
typedef Kokkos::View<size_type*, execution_space> offset_type;
typedef Kokkos::View<const int*, execution_space> bin_count_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space> const_key_view_type;
typedef Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::memory_space,
Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type;
typedef typename KeyViewType::non_const_value_type non_const_key_scalar;
typedef typename KeyViewType::const_value_type const_key_scalar;
private:
const_key_view_type keys;
const_rnd_key_view_type keys_rnd;
public:
BinSortOp bin_op;
offset_type bin_offsets;
Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic;
bin_count_type bin_count_const;
offset_type sort_order;
bool sort_within_bins;
public:
// Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false)
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
:keys(keys_),keys_rnd(keys_), bin_op(bin_op_) {
bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins());
bin_count_const = bin_count_atomic;
bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins());
sort_order = offset_type("PermutationVector",keys.dimension_0());
sort_within_bins = sort_within_bins_;
}
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
void create_permute_vector() {
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this);
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
Kokkos::deep_copy(bin_count_atomic,0);
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this);
if(sort_within_bins)
Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
}
// Sort a view with respect ot the first dimension using the permutation array
template<class ValuesViewType>
void sort(ValuesViewType values) {
ValuesViewType sorted_values = ValuesViewType("Copy",
values.dimension_0(),
values.dimension_1(),
values.dimension_2(),
values.dimension_3(),
values.dimension_4(),
values.dimension_5(),
values.dimension_6(),
values.dimension_7());
parallel_for(values.dimension_0(),
bin_sort_sort_functor<ValuesViewType, offset_type,
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
deep_copy(values,sorted_values);
}
// Get the permutation vector
KOKKOS_INLINE_FUNCTION
offset_type get_permute_vector() const { return sort_order;}
// Get the start offsets for each bin
KOKKOS_INLINE_FUNCTION
offset_type get_bin_offsets() const { return bin_offsets;}
// Get the count for each bin
KOKKOS_INLINE_FUNCTION
bin_count_type get_bin_count() const {return bin_count_const;}
public:
KOKKOS_INLINE_FUNCTION
void operator() (const bin_count_tag& tag, const int& i) const {
bin_count_atomic(bin_op.bin(keys,i))++;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const {
if(final) {
bin_offsets(i) = offset;
}
offset+=bin_count_const(i);
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_binning_tag& tag, const int& i) const {
const int bin = bin_op.bin(keys,i);
const int count = bin_count_atomic(bin)++;
sort_order(bin_offsets(bin) + count) = i;
}
KOKKOS_INLINE_FUNCTION
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
bool sorted = false;
int upper_bound = bin_offsets(i)+bin_count_const(i);
while(!sorted) {
sorted = true;
int old_idx = sort_order(bin_offsets(i));
int new_idx;
for(int k=bin_offsets(i)+1; k<upper_bound; k++) {
new_idx = sort_order(k);
if(!bin_op(keys_rnd,old_idx,new_idx)) {
sort_order(k-1) = new_idx;
sort_order(k) = old_idx;
sorted = false;
} else {
old_idx = new_idx;
}
}
upper_bound--;
}
}
};
namespace SortImpl {
template<class KeyViewType>
struct DefaultBinOp1D {
const int max_bins_;
const double mul_;
typename KeyViewType::const_value_type range_;
typename KeyViewType::const_value_type min_;
//Construct BinOp with number of bins, minimum value and maxuimum value
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max )
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
//Determine bin index from key value
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int(mul_*(keys(i)-min_));
}
//Return maximum bin index + 1
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_;
}
//Compare to keys within a bin if true new_val will be put before old_val
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1, iType2& i2) const {
return keys(i1)<keys(i2);
}
};
template<class KeyViewType>
struct DefaultBinOp3D {
int max_bins_[3];
double mul_[3];
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[] )
{
max_bins_[0] = max_bins__[0]+1;
max_bins_[1] = max_bins__[1]+1;
max_bins_[2] = max_bins__[2]+1;
mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]);
mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]);
mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]);
range_[0] = max[0]-min[0];
range_[1] = max[1]-min[1];
range_[2] = max[2]-min[2];
min_[0] = min[0];
min_[1] = min[1];
min_[2] = min[2];
}
template<class ViewType>
KOKKOS_INLINE_FUNCTION
int bin(ViewType& keys, const int& i) const {
return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) +
int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) +
int(mul_[2]*(keys(i,2)-min_[2])));
}
KOKKOS_INLINE_FUNCTION
int max_bins() const {
return max_bins_[0]*max_bins_[1]*max_bins_[2];
}
template<class ViewType, typename iType1, typename iType2>
KOKKOS_INLINE_FUNCTION
bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const {
if (keys(i1,0)>keys(i2,0)) return true;
else if (keys(i1,0)==keys(i2,0)) {
if (keys(i1,1)>keys(i2,1)) return true;
else if (keys(i1,1)==keys(i2,2)) {
if (keys(i1,2)>keys(i2,2)) return true;
}
}
return false;
}
};
template<typename Scalar>
struct min_max {
Scalar min;
Scalar max;
bool init;
KOKKOS_INLINE_FUNCTION
min_max() {
min = 0;
max = 0;
init = 0;
}
KOKKOS_INLINE_FUNCTION
min_max (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
}
KOKKOS_INLINE_FUNCTION
min_max operator = (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+= (const Scalar& val) {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (const min_max& val) {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const Scalar& val) volatile {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const min_max& val) volatile {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
};
template<class ViewType>
struct min_max_functor {
typedef typename ViewType::execution_space execution_space;
ViewType view;
typedef min_max<typename ViewType::non_const_value_type> value_type;
min_max_functor (const ViewType view_):view(view_) {
}
KOKKOS_INLINE_FUNCTION
void operator()(const size_t& i, value_type& val) const {
val += view(i);
}
};
template<class ViewType>
bool try_std_sort(ViewType view) {
bool possible = true;
#if ! KOKKOS_USING_EXP_VIEW
size_t stride[8];
view.stride(stride);
#else
size_t stride[8] = { view.stride_0()
, view.stride_1()
, view.stride_2()
, view.stride_3()
, view.stride_4()
, view.stride_5()
, view.stride_6()
, view.stride_7()
};
#endif
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if(possible) {
std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0());
}
return possible;
}
}
template<class ViewType>
void sort(ViewType view, bool always_use_kokkos_sort = false) {
if(!always_use_kokkos_sort) {
if(SortImpl::try_std_sort(view)) return;
}
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
SortImpl::min_max<typename ViewType::non_const_value_type> val;
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
}
/*template<class ViewType, class Comparator>
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
}*/
}
#endif

View File

@ -0,0 +1,38 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
SET(SOURCES
UnitTestMain.cpp
TestCuda.cpp
)
SET(LIBRARIES kokkoscore)
IF(Kokkos_ENABLE_OpenMP)
LIST( APPEND SOURCES
TestOpenMP.cpp
)
ENDIF()
IF(Kokkos_ENABLE_Serial)
LIST( APPEND SOURCES
TestSerial.cpp
)
ENDIF()
IF(Kokkos_ENABLE_Pthread)
LIST( APPEND SOURCES
TestThreads.cpp
)
ENDIF()
TRIBITS_ADD_EXECUTABLE_AND_TEST(
UnitTest
SOURCES ${SOURCES}
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
)

View File

@ -0,0 +1,92 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
test-cuda: KokkosAlgorithms_UnitTest_Cuda
./KokkosAlgorithms_UnitTest_Cuda
test-threads: KokkosAlgorithms_UnitTest_Threads
./KokkosAlgorithms_UnitTest_Threads
test-openmp: KokkosAlgorithms_UnitTest_OpenMP
./KokkosAlgorithms_UnitTest_OpenMP
test-serial: KokkosAlgorithms_UnitTest_Serial
./KokkosAlgorithms_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -0,0 +1,110 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_CUDA
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
void cuda_test_random_xorshift64( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
}
void cuda_test_random_xorshift1024( int num_draws )
{
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
}
#define CUDA_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( cuda, Random_XorShift64 ) { \
cuda_test_random_xorshift64(num_draws); \
}
#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( cuda, Random_XorShift1024 ) { \
cuda_test_random_xorshift1024(num_draws); \
}
#define CUDA_SORT_UNSIGNED( size ) \
TEST_F( cuda, SortUnsigned ) { \
Impl::test_sort< Kokkos::Cuda, unsigned >(size); \
}
CUDA_RANDOM_XORSHIFT64( 132141141 )
CUDA_RANDOM_XORSHIFT1024( 52428813 )
CUDA_SORT_UNSIGNED(171)
#undef CUDA_RANDOM_XORSHIFT64
#undef CUDA_RANDOM_XORSHIFT1024
#undef CUDA_SORT_UNSIGNED
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -0,0 +1,102 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = omp_get_max_threads();
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( openmp, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( openmp, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >(num_draws); \
}
#define OPENMP_SORT_UNSIGNED( size ) \
TEST_F( openmp, SortUnsigned ) { \
Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \
}
OPENMP_RANDOM_XORSHIFT64( 10240000 )
OPENMP_RANDOM_XORSHIFT1024( 10130144 )
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
#endif
} // namespace test

View File

@ -0,0 +1,481 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cmath>
#include <chrono>
namespace Test {
namespace Impl{
// This test runs the random number generators and uses some statistic tests to
// check the 'goodness' of the random numbers:
// (i) mean: the mean is expected to be 0.5*RAND_MAX
// (ii) variance: the variance is 1/3*mean*mean
// (iii) covariance: the covariance is 0
// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers
// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers
#define HIST_DIM3D 24
#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D)
struct RandomProperties {
uint64_t count;
double mean;
double variance;
double covariance;
double min;
double max;
KOKKOS_INLINE_FUNCTION
RandomProperties() {
count = 0;
mean = 0.0;
variance = 0.0;
covariance = 0.0;
min = 1e64;
max = -1e64;
}
KOKKOS_INLINE_FUNCTION
RandomProperties& operator+=(const RandomProperties& add) {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+=(const volatile RandomProperties& add) volatile {
count += add.count;
mean += add.mean;
variance += add.variance;
covariance += add.covariance;
min = add.min<min?add.min:min;
max = add.max>max?add.max:max;
}
};
template<class GeneratorPool, class Scalar>
struct test_random_functor {
typedef typename GeneratorPool::generator_type rnd_type;
typedef RandomProperties value_type;
typedef typename GeneratorPool::device_type device_type;
GeneratorPool rand_pool;
const double mean;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View types below.
typedef Kokkos::View<int[HIST_DIM1D+1],typename GeneratorPool::device_type> type_1d;
type_1d density_1d;
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1],typename GeneratorPool::device_type> type_3d;
type_3d density_3d;
test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) :
rand_pool (rand_pool_),
mean (0.5*Kokkos::rand<rnd_type,Scalar>::max ()),
density_1d (d1d),
density_3d (d3d)
{}
KOKKOS_INLINE_FUNCTION
void operator() (int i, RandomProperties& prop) const {
using Kokkos::atomic_fetch_add;
rnd_type rand_gen = rand_pool.get_state();
for (int k = 0; k < 1024; ++k) {
const Scalar tmp = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp;
prop.variance += (tmp-mean)*(tmp-mean);
const Scalar tmp2 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp2;
prop.variance += (tmp2-mean)*(tmp2-mean);
prop.covariance += (tmp-mean)*(tmp2-mean);
const Scalar tmp3 = Kokkos::rand<rnd_type,Scalar>::draw(rand_gen);
prop.count++;
prop.mean += tmp3;
prop.variance += (tmp3-mean)*(tmp3-mean);
prop.covariance += (tmp2-mean)*(tmp3-mean);
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to
// define an exclusive upper bound on the range of random
// numbers that draw() can generate. However, for the float
// specialization, some implementations might violate this upper
// bound, due to rounding error. Just in case, we have left an
// extra space at the end of each dimension of density_1d and
// density_3d.
//
// Please note that those extra entries might not get counted in
// the histograms. However, if Kokkos::rand is broken and only
// returns values of max(), the histograms will still catch this
// indirectly, since none of the other values will be filled in.
const Scalar theMax = Kokkos::rand<rnd_type, Scalar>::max ();
const uint64_t ind1_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp / theMax);
const uint64_t ind2_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp2 / theMax);
const uint64_t ind3_1d = static_cast<uint64_t> (1.0 * HIST_DIM1D * tmp3 / theMax);
const uint64_t ind1_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp / theMax);
const uint64_t ind2_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp2 / theMax);
const uint64_t ind3_3d = static_cast<uint64_t> (1.0 * HIST_DIM3D * tmp3 / theMax);
atomic_fetch_add (&density_1d(ind1_1d), 1);
atomic_fetch_add (&density_1d(ind2_1d), 1);
atomic_fetch_add (&density_1d(ind3_1d), 1);
atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
}
rand_pool.free_state(rand_gen);
}
};
template<class DeviceType>
struct test_histogram1d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM1D+1], memory_space> type_1d;
type_1d density_1d;
double mean;
test_histogram1d_functor (type_1d d1d, int num_draws) :
density_1d (d1d),
mean (1.0*num_draws/HIST_DIM1D*3)
{
}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_1d(i);
prop.mean += count;
prop.variance += 1.0 * (count - mean) * (count - mean);
//prop.covariance += 1.0*count*count;
prop.min = count < prop.min ? count : prop.min;
prop.max = count > prop.max ? count : prop.max;
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
prop.covariance += (count - mean) * (density_1d(i+1) - mean);
}
}
};
template<class DeviceType>
struct test_histogram3d_functor {
typedef RandomProperties value_type;
typedef typename DeviceType::execution_space execution_space;
typedef typename DeviceType::memory_space memory_space;
// NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define
// an exclusive upper bound on the range of random numbers that
// draw() can generate. However, for the float specialization, some
// implementations might violate this upper bound, due to rounding
// error. Just in case, we leave an extra space at the end of each
// dimension, in the View type below.
typedef Kokkos::View<int[HIST_DIM3D+1][HIST_DIM3D+1][HIST_DIM3D+1], memory_space> type_3d;
type_3d density_3d;
double mean;
test_histogram3d_functor (type_3d d3d, int num_draws) :
density_3d (d3d),
mean (1.0*num_draws/HIST_DIM1D)
{}
KOKKOS_INLINE_FUNCTION void
operator() (const typename memory_space::size_type i,
RandomProperties& prop) const
{
typedef typename memory_space::size_type size_type;
const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D),
(i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
i % HIST_DIM3D);
prop.mean += count;
prop.variance += (count - mean) * (count - mean);
if (i < static_cast<size_type> (HIST_DIM1D-1)) {
const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D),
((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D,
(i+1)%HIST_DIM3D);
prop.covariance += (count - mean) * (count_next - mean);
}
}
};
//
// Templated test that uses the above functors.
//
template <class RandomGenerator,class Scalar>
struct test_random_scalar {
typedef typename RandomGenerator::generator_type rnd_type;
int pass_mean,pass_var,pass_covar;
int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar;
int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar;
test_random_scalar (typename test_random_functor<RandomGenerator,int>::type_1d& density_1d,
typename test_random_functor<RandomGenerator,int>::type_3d& density_3d,
RandomGenerator& pool,
unsigned int num_draws)
{
using std::cerr;
using std::endl;
using Kokkos::parallel_reduce;
{
cerr << " -- Testing randomness properties" << endl;
RandomProperties result;
typedef test_random_functor<RandomGenerator, Scalar> functor_type;
parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);
//printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
double tolerance = 1.6*sqrt(1.0/num_draws);
double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
double variance_expect = 1.0/3.0*mean_expect*mean_expect;
double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0;
double covariance_eps = result.covariance/num_draws/2/variance_expect;
pass_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_var = ((-1.5*tolerance < variance_eps) &&
( 1.5*tolerance > variance_eps)) ? 1:0;
pass_covar = ((-2.0*tolerance < covariance_eps) &&
( 2.0*tolerance > covariance_eps)) ? 1:0;
cerr << "Pass: " << pass_mean
<< " " << pass_var
<< " " << mean_eps
<< " " << variance_eps
<< " " << covariance_eps
<< " || " << tolerance << endl;
}
{
cerr << " -- Testing 1-D histogram" << endl;
RandomProperties result;
typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist1d_mean = ((-0.0001 < mean_eps) &&
( 0.0001 > mean_eps)) ? 1:0;
pass_hist1d_var = ((-0.07 < variance_eps) &&
( 0.07 > variance_eps)) ? 1:0;
pass_hist1d_covar = ((-0.06 < covariance_eps) &&
( 0.06 > covariance_eps)) ? 1:0;
cerr << "Density 1D: " << mean_eps
<< " " << variance_eps
<< " " << (result.covariance/HIST_DIM1D/HIST_DIM1D)
<< " || " << tolerance
<< " " << result.min
<< " " << result.max
<< " || " << result.variance/HIST_DIM1D
<< " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D)
<< " || " << result.covariance/HIST_DIM1D
<< " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D
<< endl;
}
{
cerr << " -- Testing 3-D histogram" << endl;
RandomProperties result;
typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);
double tolerance = 6*sqrt(1.0/HIST_DIM1D);
double mean_expect = 1.0*num_draws/HIST_DIM1D;
double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0;
double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0;
double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect;
pass_hist3d_mean = ((-tolerance < mean_eps) &&
( tolerance > mean_eps)) ? 1:0;
pass_hist3d_var = ((-1.2*tolerance < variance_eps) &&
( 1.2*tolerance > variance_eps)) ? 1:0;
pass_hist3d_covar = ((-tolerance < covariance_eps) &&
( tolerance > covariance_eps)) ? 1:0;
cerr << "Density 3D: " << mean_eps
<< " " << variance_eps
<< " " << result.covariance/HIST_DIM1D/HIST_DIM1D
<< " || " << tolerance
<< " " << result.min
<< " " << result.max << endl;
}
}
};
template <class RandomGenerator>
void test_random(unsigned int num_draws)
{
using std::cerr;
using std::endl;
typename test_random_functor<RandomGenerator,int>::type_1d density_1d("D1d");
typename test_random_functor<RandomGenerator,int>::type_3d density_3d("D3d");
uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count();
cerr << "Test Seed:" << ticks << endl;
RandomGenerator pool(ticks);
cerr << "Test Scalar=int" << endl;
test_random_scalar<RandomGenerator,int> test_int(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int.pass_mean,1);
ASSERT_EQ( test_int.pass_var,1);
ASSERT_EQ( test_int.pass_covar,1);
ASSERT_EQ( test_int.pass_hist1d_mean,1);
ASSERT_EQ( test_int.pass_hist1d_var,1);
ASSERT_EQ( test_int.pass_hist1d_covar,1);
ASSERT_EQ( test_int.pass_hist3d_mean,1);
ASSERT_EQ( test_int.pass_hist3d_var,1);
ASSERT_EQ( test_int.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=unsigned int" << endl;
test_random_scalar<RandomGenerator,unsigned int> test_uint(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint.pass_mean,1);
ASSERT_EQ( test_uint.pass_var,1);
ASSERT_EQ( test_uint.pass_covar,1);
ASSERT_EQ( test_uint.pass_hist1d_mean,1);
ASSERT_EQ( test_uint.pass_hist1d_var,1);
ASSERT_EQ( test_uint.pass_hist1d_covar,1);
ASSERT_EQ( test_uint.pass_hist3d_mean,1);
ASSERT_EQ( test_uint.pass_hist3d_var,1);
ASSERT_EQ( test_uint.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=int64_t" << endl;
test_random_scalar<RandomGenerator,int64_t> test_int64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_int64.pass_mean,1);
ASSERT_EQ( test_int64.pass_var,1);
ASSERT_EQ( test_int64.pass_covar,1);
ASSERT_EQ( test_int64.pass_hist1d_mean,1);
ASSERT_EQ( test_int64.pass_hist1d_var,1);
ASSERT_EQ( test_int64.pass_hist1d_covar,1);
ASSERT_EQ( test_int64.pass_hist3d_mean,1);
ASSERT_EQ( test_int64.pass_hist3d_var,1);
ASSERT_EQ( test_int64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=uint64_t" << endl;
test_random_scalar<RandomGenerator,uint64_t> test_uint64(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_uint64.pass_mean,1);
ASSERT_EQ( test_uint64.pass_var,1);
ASSERT_EQ( test_uint64.pass_covar,1);
ASSERT_EQ( test_uint64.pass_hist1d_mean,1);
ASSERT_EQ( test_uint64.pass_hist1d_var,1);
ASSERT_EQ( test_uint64.pass_hist1d_covar,1);
ASSERT_EQ( test_uint64.pass_hist3d_mean,1);
ASSERT_EQ( test_uint64.pass_hist3d_var,1);
ASSERT_EQ( test_uint64.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=float" << endl;
test_random_scalar<RandomGenerator,float> test_float(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_float.pass_mean,1);
ASSERT_EQ( test_float.pass_var,1);
ASSERT_EQ( test_float.pass_covar,1);
ASSERT_EQ( test_float.pass_hist1d_mean,1);
ASSERT_EQ( test_float.pass_hist1d_var,1);
ASSERT_EQ( test_float.pass_hist1d_covar,1);
ASSERT_EQ( test_float.pass_hist3d_mean,1);
ASSERT_EQ( test_float.pass_hist3d_var,1);
ASSERT_EQ( test_float.pass_hist3d_covar,1);
deep_copy(density_1d,0);
deep_copy(density_3d,0);
cerr << "Test Scalar=double" << endl;
test_random_scalar<RandomGenerator,double> test_double(density_1d,density_3d,pool,num_draws);
ASSERT_EQ( test_double.pass_mean,1);
ASSERT_EQ( test_double.pass_var,1);
ASSERT_EQ( test_double.pass_covar,1);
ASSERT_EQ( test_double.pass_hist1d_mean,1);
ASSERT_EQ( test_double.pass_hist1d_var,1);
ASSERT_EQ( test_double.pass_hist1d_covar,1);
ASSERT_EQ( test_double.pass_hist3d_mean,1);
ASSERT_EQ( test_double.pass_hist3d_var,1);
ASSERT_EQ( test_double.pass_hist3d_covar,1);
}
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_SERIAL
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision (5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase ()
{
Kokkos::Serial::finalize ();
}
};
#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( serial, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( serial, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Serial> >(num_draws); \
}
#define SERIAL_SORT_UNSIGNED( size ) \
TEST_F( serial, SortUnsigned ) { \
Impl::test_sort< Kokkos::Serial, unsigned >(size); \
}
SERIAL_RANDOM_XORSHIFT64( 10240000 )
SERIAL_RANDOM_XORSHIFT1024( 10130144 )
SERIAL_SORT_UNSIGNED(171)
#undef SERIAL_RANDOM_XORSHIFT64
#undef SERIAL_RANDOM_XORSHIFT1024
#undef SERIAL_SORT_UNSIGNED
#endif // KOKKOS_HAVE_SERIAL
} // namespace Test

View File

@ -0,0 +1,206 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef TESTSORT_HPP_
#define TESTSORT_HPP_
#include <gtest/gtest.h>
#include<Kokkos_Core.hpp>
#include<Kokkos_Random.hpp>
#include<Kokkos_Sort.hpp>
namespace Test {
namespace Impl{
template<class ExecutionSpace, class Scalar>
struct is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
is_sorted_struct(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
if(keys(i)>keys(i+1)) count++;
}
};
template<class ExecutionSpace, class Scalar>
struct sum {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*,ExecutionSpace> keys;
sum(Kokkos::View<Scalar*,ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i);
}
};
template<class ExecutionSpace, class Scalar>
struct bin3d_is_sorted_struct {
typedef unsigned int value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
int max_bins;
Scalar min;
Scalar max;
bin3d_is_sorted_struct(Kokkos::View<Scalar*[3],ExecutionSpace> keys_,int max_bins_,Scalar min_,Scalar max_):
keys(keys_),max_bins(max_bins_),min(min_),max(max_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i, unsigned int& count) const {
int ix1 = int ((keys(i,0)-min)/max * max_bins);
int iy1 = int ((keys(i,1)-min)/max * max_bins);
int iz1 = int ((keys(i,2)-min)/max * max_bins);
int ix2 = int ((keys(i+1,0)-min)/max * max_bins);
int iy2 = int ((keys(i+1,1)-min)/max * max_bins);
int iz2 = int ((keys(i+1,2)-min)/max * max_bins);
if (ix1>ix2) count++;
else if(ix1==ix2) {
if (iy1>iy2) count++;
else if ((iy1==iy2) && (iz1>iz2)) count++;
}
}
};
template<class ExecutionSpace, class Scalar>
struct sum3D {
typedef double value_type;
typedef ExecutionSpace execution_space;
Kokkos::View<Scalar*[3],ExecutionSpace> keys;
sum3D(Kokkos::View<Scalar*[3],ExecutionSpace> keys_):keys(keys_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i, double& count) const {
count+=keys(i,0);
count+=keys(i,1);
count+=keys(i,2);
}
};
template<class ExecutionSpace, typename KeyType>
void test_1D_sort(unsigned int n,bool force_kokkos) {
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
KeyViewType keys("Keys",n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_before);
Kokkos::sort(keys,force_kokkos);
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int n) {
typedef Kokkos::View<KeyType*[3],ExecutionSpace > KeyViewType;
KeyViewType keys("Keys",n*n*n);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,100.0);
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
int bin_1d = 1;
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
typename KeyViewType::value_type min[3] = {0,0,0};
typename KeyViewType::value_type max[3] = {100,100,100};
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
BinOp bin_op(bin_max,min,max);
Kokkos::BinSort< KeyViewType , BinOp >
Sorter(keys,bin_op,false);
Sorter.create_permute_vector();
Sorter.template sort< KeyViewType >(keys);
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0;
printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails);
ASSERT_EQ(sort_fails,0);
ASSERT_EQ(equal_sum,1);
}
template<class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N)
{
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true);
test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false);
test_3D_sort<ExecutionSpace,KeyType>(N);
}
}
}
#endif /* TESTSORT_HPP_ */

View File

@ -0,0 +1,113 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
namespace Test {
#ifdef KOKKOS_HAVE_PTHREAD
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
#define THREADS_RANDOM_XORSHIFT64( num_draws ) \
TEST_F( threads, Random_XorShift64 ) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \
TEST_F( threads, Random_XorShift1024 ) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Threads> >(num_draws); \
}
#define THREADS_SORT_UNSIGNED( size ) \
TEST_F( threads, SortUnsigned ) { \
Impl::test_sort< Kokkos::Threads, double >(size); \
}
THREADS_RANDOM_XORSHIFT64( 10240000 )
THREADS_RANDOM_XORSHIFT1024( 10130144 )
THREADS_SORT_UNSIGNED(171)
#undef THREADS_RANDOM_XORSHIFT64
#undef THREADS_RANDOM_XORSHIFT1024
#undef THREADS_SORT_UNSIGNED
#endif
} // namespace Test

View File

@ -0,0 +1,50 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,10 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS
#SubPackageName Directory Class Req/Opt
#
# New Kokkos subpackages:
Core core PS REQUIRED
Containers containers PS OPTIONAL
Algorithms algorithms PS OPTIONAL
Example example EX OPTIONAL
)

View File

@ -0,0 +1,79 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
# Check for CUDA support
SET(_CUDA_FAILURE OFF)
# Have CMake find CUDA
IF(NOT _CUDA_FAILURE)
FIND_PACKAGE(CUDA 3.2)
IF (NOT CUDA_FOUND)
SET(_CUDA_FAILURE ON)
ENDIF()
ENDIF()
IF(NOT _CUDA_FAILURE)
# if we haven't met failure
macro(PACKAGE_ADD_CUDA_LIBRARY cuda_target)
TRIBITS_ADD_LIBRARY(${cuda_target} ${ARGN} CUDALIBRARY)
endmacro()
GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY})
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
ELSE()
SET(TPL_ENABLE_CUDA OFF)
ENDIF()

View File

@ -0,0 +1,64 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
IF (TPL_ENABLE_CUDA)
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
ENDIF()

View File

@ -0,0 +1,70 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
#-----------------------------------------------------------------------------
# Hardware locality detection and control library.
#
# Acquisition information:
# Date checked: November 2011
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
# Source: http://www.open-mpi.org/projects/hwloc/
# Version: 1.3
#
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC
REQUIRED_HEADERS hwloc.h
REQUIRED_LIBS_NAMES "hwloc"
)

View File

@ -0,0 +1,83 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
SET(USE_THREADS FALSE)
IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES)
# Use CMake's Thread finder since it is a bit smarter in determining
# whether pthreads is already built into the compiler and doesn't need
# a library to link.
FIND_PACKAGE(Threads)
#If Threads found a copy of pthreads make sure it is one of the cases the tribits
#tpl system cannot handle.
IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread")
SET(USE_THREADS TRUE)
ENDIF()
ENDIF()
ENDIF()
IF(USE_THREADS)
SET(TPL_Pthread_INCLUDE_DIRS "")
SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
SET(TPL_Pthread_LIBRARY_DIRS "")
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread)
ELSE()
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread
REQUIRED_HEADERS pthread.h
REQUIRED_LIBS_NAMES pthread
)
ENDIF()

View File

@ -0,0 +1,70 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
#-----------------------------------------------------------------------------
# Hardware locality detection and control library.
#
# Acquisition information:
# Date checked: July 2014
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
# Source: https://code.google.com/p/qthreads
#
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
REQUIRED_HEADERS qthread.h
REQUIRED_LIBS_NAMES "qthread"
)

View File

@ -0,0 +1,75 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
# Check for CUDA support
IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1")
MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)")
ELSE()
IF(CMAKE_VERSION VERSION_LESS "2.8.8")
# FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must
find_library(CUDA_cusparse_LIBRARY
cusparse
HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib
)
IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND")
MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.")
ENDIF()
ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8")
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
ENDIF()

View File

@ -0,0 +1,71 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
#-----------------------------------------------------------------------------
# Hardware locality detection and control library.
#
# Acquisition information:
# Date checked: November 2011
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
# Source: http://www.open-mpi.org/projects/hwloc/
# Version: 1.3
#
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC
REQUIRED_HEADERS hwloc.h
REQUIRED_LIBS_NAMES "hwloc"
)

View File

@ -0,0 +1,82 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
SET(USE_THREADS FALSE)
IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES)
# Use CMake's Thread finder since it is a bit smarter in determining
# whether pthreads is already built into the compiler and doesn't need
# a library to link.
FIND_PACKAGE(Threads)
#If Threads found a copy of pthreads make sure it is one of the cases the tribits
#tpl system cannot handle.
IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread")
SET(USE_THREADS TRUE)
ENDIF()
ENDIF()
ENDIF()
IF(USE_THREADS)
SET(TPL_Pthread_INCLUDE_DIRS "")
SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
SET(TPL_Pthread_LIBRARY_DIRS "")
ELSE()
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread
REQUIRED_HEADERS pthread.h
REQUIRED_LIBS_NAMES pthread
)
ENDIF()

View File

@ -0,0 +1,70 @@
# @HEADER
# ************************************************************************
#
# Trilinos: An Object-Oriented Solver Framework
# Copyright (2001) Sandia Corporation
#
#
# Copyright (2001) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000, there is a non-exclusive license for use of this
# work by or on behalf of the U.S. Government. Export of this program
# may require a license from the United States Government.
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# NOTICE: The United States Government is granted for itself and others
# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide
# license in this data to reproduce, prepare derivative works, and
# perform publicly and display publicly. Beginning five (5) years from
# July 25, 2001, the United States Government is granted for itself and
# others acting on its behalf a paid-up, nonexclusive, irrevocable
# worldwide license in this data to reproduce, prepare derivative works,
# distribute copies to the public, perform publicly and display
# publicly, and to permit others to do so.
#
# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT
# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES
# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR
# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY
# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS
# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS.
#
# ************************************************************************
# @HEADER
#-----------------------------------------------------------------------------
# Hardware locality detection and control library.
#
# Acquisition information:
# Date checked: July 2014
# Checked by: H. Carter Edwards <hcedwar AT sandia.gov>
# Source: https://code.google.com/p/qthreads
#
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD
REQUIRED_HEADERS qthread.h
REQUIRED_LIBS_NAMES "qthread"
)

View File

@ -0,0 +1,485 @@
INCLUDE(CMakeParseArguments)
INCLUDE(CTest)
FUNCTION(ASSERT_DEFINED VARS)
FOREACH(VAR ${VARS})
IF(NOT DEFINED ${VAR})
MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!")
ENDIF()
ENDFOREACH()
ENDFUNCTION()
MACRO(GLOBAL_SET VARNAME)
SET(${VARNAME} ${ARGN} CACHE INTERNAL "")
ENDMACRO()
MACRO(PREPEND_GLOBAL_SET VARNAME)
ASSERT_DEFINED(${VARNAME})
GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}})
ENDMACRO()
FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME)
ASSERT_DEFINED(${VARNAME})
IF (${VARNAME})
SET(TMP ${${VARNAME}})
LIST(REMOVE_DUPLICATES TMP)
GLOBAL_SET(${VARNAME} ${TMP})
ENDIF()
ENDFUNCTION()
MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE)
MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'")
SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" )
IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
IF(${USER_OPTION_NAME})
GLOBAL_SET(${MACRO_DEFINE_NAME} ON)
ELSE()
GLOBAL_SET(${MACRO_DEFINE_NAME} OFF)
ENDIF()
ENDIF()
ENDMACRO()
FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE)
# Configure the file
CONFIGURE_FILE(
${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in
${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE}
)
ENDFUNCTION()
MACRO(TRIBITS_ADD_DEBUG_OPTION)
TRIBITS_ADD_OPTION_AND_DEFINE(
${PROJECT_NAME}_ENABLE_DEBUG
HAVE_${PROJECT_NAME_UC}_DEBUG
"Enable a host of runtime debug checking."
OFF
)
ENDMACRO()
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
ENDFOREACH()
ENDMACRO()
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES)
FOREACH(EXAMPLE_DIR ${ARGN})
ADD_SUBDIRECTORY(${EXAMPLE_DIR})
ENDFOREACH()
ENDIF()
ENDMACRO()
MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT)
SET(PROP_VALUES)
FOREACH(TARGET_X ${ARGN})
LIST(APPEND PROP_VALUES "$<TARGET_PROPERTY:${TARGET_X},${PROP_IN}>")
ENDFOREACH()
SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}")
ENDMACRO()
MACRO(ADD_INTERFACE_LIBRARY LIB_NAME)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "")
ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp)
SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE)
ENDMACRO()
# Older versions of cmake does not make include directories transitive
MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME)
TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN})
FOREACH(DEP_LIB ${ARGN})
TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $<TARGET_PROPERTY:${DEP_LIB},INTERFACE_INCLUDE_DIRECTORIES>)
TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $<TARGET_PROPERTY:${DEP_LIB},INCLUDE_DIRECTORIES>)
ENDFOREACH()
ENDMACRO()
FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME)
SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY)
SET(oneValueArgs)
SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT)
CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
IF(PARSE_HEADERS)
LIST(REMOVE_DUPLICATES PARSE_HEADERS)
ENDIF()
IF(PARSE_SOURCES)
LIST(REMOVE_DUPLICATES PARSE_SOURCES)
ENDIF()
# Local variable to hold all of the libraries that will be directly linked
# to this library.
SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS})
# Add dependent libraries passed directly in
IF (PARSE_IMPORTEDLIBS)
LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS})
ENDIF()
IF (PARSE_DEPLIBS)
LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS})
ENDIF()
# Add the library and all the dependencies
IF (PARSE_DEFINES)
ADD_DEFINITIONS(${PARSE_DEFINES})
ENDIF()
IF (PARSE_STATIC)
SET(STATIC_KEYWORD "STATIC")
ELSE()
SET(STATIC_KEYWORD)
ENDIF()
IF (PARSE_SHARED)
SET(SHARED_KEYWORD "SHARED")
ELSE()
SET(SHARED_KEYWORD)
ENDIF()
IF (PARSE_TESTONLY)
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
ELSE()
SET(EXCLUDE_FROM_ALL_KEYWORD)
ENDIF()
IF (NOT PARSE_CUDALIBRARY)
ADD_LIBRARY(
${LIBRARY_NAME}
${STATIC_KEYWORD}
${SHARED_KEYWORD}
${EXCLUDE_FROM_ALL_KEYWORD}
${PARSE_HEADERS}
${PARSE_NOINSTALLHEADERS}
${PARSE_SOURCES}
)
ELSE()
CUDA_ADD_LIBRARY(
${LIBRARY_NAME}
${PARSE_HEADERS}
${PARSE_NOINSTALLHEADERS}
${PARSE_SOURCES}
)
ENDIF()
TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS})
IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS)
INSTALL(
TARGETS ${LIBRARY_NAME}
EXPORT ${PROJECT_NAME}
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
COMPONENT ${PACKAGE_NAME}
)
INSTALL(
FILES ${PARSE_HEADERS}
EXPORT ${PROJECT_NAME}
DESTINATION include
COMPONENT ${PACKAGE_NAME}
)
INSTALL(
DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR}
EXPORT ${PROJECT_NAME}
DESTINATION include
COMPONENT ${PACKAGE_NAME}
)
ENDIF()
IF (NOT PARSE_TESTONLY)
PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME})
REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS)
ENDIF()
ENDFUNCTION()
FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY)
SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT)
SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES)
CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
IF (PARSE_TARGET_DEFINES)
TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES})
ENDIF()
SET(LINK_LIBS PACKAGE_${PACKAGE_NAME})
IF (PARSE_TESTONLYLIBS)
LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS})
ENDIF()
IF (PARSE_IMPORTEDLIBS)
LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS})
ENDIF()
SET (EXE_SOURCES)
IF(PARSE_DIRECTORY)
FOREACH( SOURCE_FILE ${PARSE_SOURCES} )
IF(IS_ABSOLUTE ${SOURCE_FILE})
SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE})
ELSE()
SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE})
ENDIF()
ENDFOREACH( )
ELSE()
FOREACH( SOURCE_FILE ${PARSE_SOURCES} )
SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE})
ENDFOREACH( )
ENDIF()
SET(EXE_BINARY_NAME ${EXE_NAME})
IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX)
SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME})
ENDIF()
IF (PARSE_TESTONLY)
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
ELSE()
SET(EXCLUDE_FROM_ALL_KEYWORD)
ENDIF()
ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES})
TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS})
IF(PARSE_ADDED_EXE_TARGET_NAME_OUT)
SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE)
ENDIF()
IF(PARSE_INSTALLABLE)
INSTALL(
TARGETS ${EXE_BINARY_NAME}
EXPORT ${PROJECT_NAME}
DESTINATION bin
)
ENDIF()
ENDFUNCTION()
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME)
SET(options STANDARD_PASS_OUTPUT WILL_FAIL)
SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT)
SET(multiValueArgs)
CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS})
IF(WIN32)
ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX})
ELSE()
ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME})
ENDIF()
ADD_DEPENDENCIES(check ${TEST_NAME})
IF(PARSE_FAIL_REGULAR_EXPRESSION)
SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION})
ENDIF()
IF(PARSE_PASS_REGULAR_EXPRESSION)
SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION})
ENDIF()
IF(PARSE_WILL_FAIL)
SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL})
ENDIF()
IF(PARSE_ADDED_TESTS_NAMES_OUT)
SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE)
ENDIF()
IF(PARSE_ADDED_EXE_TARGET_NAME_OUT)
SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE)
ENDIF()
ENDFUNCTION()
MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME)
ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME})
TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES})
TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS})
ENDMACRO()
FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME)
SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL)
SET(oneValueArgs)
SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES)
CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE)
IF (PARSE_REQUIRED_LIBS_NAMES)
FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES})
IF(NOT TPL_${TPL_NAME}_LIBRARIES)
SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE)
ENDIF()
ENDIF()
IF (PARSE_REQUIRED_HEADERS)
FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS})
IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS)
SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE)
ENDIF()
ENDIF()
IF (_${TPL_NAME}_ENABLE_SUCCESS)
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME})
ENDIF()
ENDFUNCTION()
MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE)
GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE)
INCLUDE("${TPL_FILE}")
IF(TARGET TPL_LIB_${TPL_NAME})
MESSAGE(STATUS "Found tpl library: ${TPL_NAME}")
SET(TPL_ENABLE_${TPL_NAME} TRUE)
ELSE()
MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}")
SET(TPL_ENABLE_${TPL_NAME} FALSE)
ENDIF()
ENDMACRO()
MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE)
IF(TYPE STREQUAL "REQUIRED")
SET(REQUIRED TRUE)
ELSE()
SET(REQUIRED FALSE)
ENDIF()
IF(TARGET ${TARGET_NAME})
PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME})
ELSE()
IF(REQUIRED)
MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}")
ENDIF()
ENDIF()
ENDMACRO()
MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE)
FOREACH(DEP ${ARGN})
PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP})
ENDFOREACH()
ENDMACRO()
MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE)
FOREACH(DEP ${ARGN})
PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE})
ENDFOREACH()
ENDMACRO()
MACRO(TRIBITS_ENABLE_TPLS)
FOREACH(TPL ${ARGN})
IF(TARGET ${TPL})
GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE)
ELSE()
GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE)
ENDIF()
ENDFOREACH()
ENDMACRO()
MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES)
SET(options)
SET(oneValueArgs)
SET(multiValueArgs
LIB_REQUIRED_PACKAGES
LIB_OPTIONAL_PACKAGES
TEST_REQUIRED_PACKAGES
TEST_OPTIONAL_PACKAGES
LIB_REQUIRED_TPLS
LIB_OPTIONAL_TPLS
TEST_REQUIRED_TPLS
TEST_OPTIONAL_TPLS
REGRESSION_EMAIL_LIST
SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS
)
CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
GLOBAL_SET(${PACKAGE_NAME}_DEPS "")
TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES})
TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES})
TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS})
TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS})
GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "")
TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES})
TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES})
TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS})
TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS})
TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS})
ENDMACRO()
MACRO(TRIBITS_SUBPACKAGE NAME)
SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME})
SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME})
GLOBAL_SET(${PACKAGE_NAME}_LIBS "")
INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake)
ENDMACRO(TRIBITS_SUBPACKAGE)
MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS})
ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
MACRO(TRIBITS_PACKAGE_DECL NAME)
PROJECT(${NAME})
STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC)
SET(PACKAGE_NAME ${PROJECT_NAME})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake")
FOREACH(TPL_FILE ${TPLS_FILES})
TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE})
ENDFOREACH()
ENDMACRO()
MACRO(TRIBITS_PROCESS_SUBPACKAGES)
FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake)
FOREACH(SUBPACKAGE ${SUBPACKAGES})
GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY)
GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY)
ADD_SUBDIRECTORY(${SUBPACKAGE_DIR})
ENDFOREACH()
ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES)
MACRO(TRIBITS_PACKAGE_DEF)
ENDMACRO(TRIBITS_PACKAGE_DEF)
MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES)
ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES)
MACRO(TRIBITS_EXCLUDE_FILES)
ENDMACRO(TRIBITS_EXCLUDE_FILES)
MACRO(TRIBITS_PACKAGE_POSTPROCESS)
ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS)

View File

@ -0,0 +1,190 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with Intel compiler
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,186 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build for MIC architecture:
INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,293 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
#-----------------------------------------------------------------------------
USE_CUDA_ARCH=
USE_THREAD=
USE_OPENMP=
USE_INTEL=
USE_XEON_PHI=
HWLOC_BASE_DIR=
MPI_BASE_DIR=
BLAS_LIB_DIR=
LAPACK_LIB_DIR=
if [ 1 ] ; then
# Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Xeon Phi and hwloc
USE_OPENMP=ON
USE_INTEL=ON
USE_XEON_PHI=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
fi
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure command line options:
CMAKE_CONFIGURE=""
CMAKE_CXX_FLAGS=""
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
if [ 1 ] ; then
# Configure for Tpetra/Kokkos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode"
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE"
if [ -n "${USE_CUDA_ARCH}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON"
fi
fi
if [ 1 ] ; then
# Configure for Stokhos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON"
fi
if [ 1 ] ; then
# Configure for TrilinosCouplings:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON"
if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
fi
#-----------------------------------------------------------------------------
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use pthread configuation:
if [ "${USE_THREAD}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use OpenMP configuation:
if [ "${USE_OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Hardware locality configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${USE_CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}"
if [ "${USE_OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
# Cross-compile for Intel Xeon Phi:
if [ "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
if [ -n "${CMAKE_CXX_FLAGS}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'"
fi
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}"
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,88 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
# to build:
# build on bgq-b[1-12]
# module load sierra-devel
# run this configure file
# make
# to run:
# ssh bgq-login
# cd /scratch/username/...
# export OMP_PROC_BIND and XLSMPOPTS environment variables
# run with srun
# Note: hwloc does not work to get or set cpubindings on bgq.
# Use the openmp backend and the openmp environment variables.
#
# Only the mpi wrappers seem to be setup for cross-compile,
# so it is important that this configure enables MPI and uses mpigcc wrappers.
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="../Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2"
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,216 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
#CMAKE_BUILD_TYPE=DEBUG
#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
#CUDA_ARCH=""
#CUDA_ARCH="20"
#CUDA_ARCH="30"
CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
PTHREADS=ON
# Build host code with Intel compiler:
INTEL=OFF
# Build for MIC architecture:
INTEL_XEON_PHI=OFF
# Build with HWLOC at location:
#HWLOC_BASE_DIR=""
#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
# Location for MPI to use in examples:
#MPI_BASE_DIR=""
#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3"
#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
if [ "${PTHREADS}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,204 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
# Build host code with Intel compiler:
# INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
# Location for MPI to use in examples:
MPI_BASE_DIR="/home/sems/common/openmpi/current"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,190 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
CUDA_ARCH="35"
# Build host code with Intel compiler:
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,140 @@
#!/bin/bash
#
# This script uses CUDA, OpenMP, and MPI.
#
# Before invoking this script, set the OMPI_CXX environment variable
# to point to nvcc_wrapper, wherever it happens to live. (If you use
# an MPI implementation other than OpenMPI, set the corresponding
# environment variable instead.)
#
rm -f CMakeCache.txt;
rm -rf CMakeFiles
EXTRA_ARGS=$@
MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5"
CUDA_PATH="/opt/nvidia/cuda/6.5.14"
#
# As long as there are any .cu files in Trilinos, we'll need to set
# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and
# lets nvcc_wrapper handle them as .cpp files, then we won't need to
# set CUDA_NVCC_FLAGS. As it is, given that we need to set
# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as
# nvcc_wrapper passes to nvcc.
#
CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM"
cmake \
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \
-D CMAKE_BUILD_TYPE:STRING=DEBUG \
-D CMAKE_CXX_FLAGS:STRING="-g -Wall" \
-D CMAKE_C_FLAGS:STRING="-g -Wall" \
-D CMAKE_FORTRAN_FLAGS:STRING="" \
-D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \
-D Trilinos_ENABLE_Triutils=OFF \
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \
-D Trilinos_ENABLE_DEBUG:BOOL=OFF \
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \
-D BUILD_SHARED_LIBS:BOOL=OFF \
-D DART_TESTING_TIMEOUT:STRING=600 \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
\
\
-D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \
-D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \
\
\
-D Trilinos_ENABLE_CXX11:BOOL=OFF \
-D TPL_ENABLE_MPI:BOOL=ON \
-D Trilinos_ENABLE_OpenMP:BOOL=ON \
-D Trilinos_ENABLE_ThreadPool:BOOL=ON \
\
\
-D TPL_ENABLE_CUDA:BOOL=ON \
-D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \
-D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \
-D TPL_ENABLE_Thrust:BOOL=OFF \
-D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \
-D TPL_ENABLE_CUSPARSE:BOOL=OFF \
-D TPL_ENABLE_Cusp:BOOL=OFF \
-D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \
-D CUDA_VERBOSE_BUILD:BOOL=OFF \
-D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \
\
\
-D TPL_ENABLE_HWLOC=OFF \
-D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \
-D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \
-D TPL_ENABLE_BinUtils=OFF \
-D TPL_ENABLE_BLAS:STRING=ON \
-D TPL_ENABLE_LAPACK:STRING=ON \
-D TPL_ENABLE_MKL:STRING=OFF \
-D TPL_ENABLE_HWLOC:STRING=OFF \
-D TPL_ENABLE_GTEST:STRING=ON \
-D TPL_ENABLE_SuperLU=ON \
-D TPL_ENABLE_BLAS=ON \
-D TPL_ENABLE_LAPACK=ON \
-D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \
-D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \
\
\
-D Trilinos_Enable_Kokkos:BOOL=ON \
-D Trilinos_ENABLE_KokkosCore:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \
-D Trilinos_ENABLE_KokkosContainers:BOOL=ON \
-D Trilinos_ENABLE_TpetraKernels:BOOL=ON \
-D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \
-D Trilinos_ENABLE_KokkosExample:BOOL=ON \
-D Kokkos_ENABLE_EXAMPLES:BOOL=ON \
-D Kokkos_ENABLE_TESTS:BOOL=OFF \
-D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \
-D TpetraClassic_ENABLE_OpenMPNode=OFF \
-D TpetraClassic_ENABLE_TPINode=OFF \
-D TpetraClassic_ENABLE_MKL=OFF \
-D Kokkos_ENABLE_Cuda_UVM=ON \
\
\
-D Trilinos_ENABLE_Teuchos:BOOL=ON \
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF \
\
\
-D Trilinos_ENABLE_Tpetra:BOOL=ON \
-D Tpetra_ENABLE_KokkosCore=ON \
-D Tpetra_ENABLE_Kokkos_DistObject=OFF \
-D Tpetra_ENABLE_Kokkos_Refactor=ON \
-D Tpetra_ENABLE_TESTS=ON \
-D Tpetra_ENABLE_EXAMPLES=ON \
-D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \
\
\
-D Trilinos_ENABLE_Belos=OFF \
-D Trilinos_ENABLE_Amesos=OFF \
-D Trilinos_ENABLE_Amesos2=OFF \
-D Trilinos_ENABLE_Ifpack=OFF \
-D Trilinos_ENABLE_Ifpack2=OFF \
-D Trilinos_ENABLE_Epetra=OFF \
-D Trilinos_ENABLE_EpetraExt=OFF \
-D Trilinos_ENABLE_Zoltan=OFF \
-D Trilinos_ENABLE_Zoltan2=OFF \
-D Trilinos_ENABLE_MueLu=OFF \
-D Belos_ENABLE_TESTS=ON \
-D Belos_ENABLE_EXAMPLES=ON \
-D MueLu_ENABLE_TESTS=ON \
-D MueLu_ENABLE_EXAMPLES=ON \
-D Ifpack2_ENABLE_TESTS=ON \
-D Ifpack2_ENABLE_EXAMPLES=ON \
$EXTRA_ARGS \
${HOME}/Trilinos

View File

@ -0,0 +1,153 @@
// -------------------------------------------------------------------------------- //
The following steps are for workstations/servers with the SEMS environment installed.
// -------------------------------------------------------------------------------- //
Summary:
- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers.
- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch.
- Step 3: Build and test Trilinos with combinations of compilers, types, backends.
- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures.
- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos.
// -------------------------------------------------------------------------------- //
// -------------------------------------------------------------------------------- //
Step 1:
1.1. Update kokkos develop branch (NOT a fork)
(From kokkos directory):
git fetch --all
git checkout develop
git reset --hard origin/develop
1.2. Create a testing directory - here the directory is created within the kokkos directory
mkdir testing
cd testing
1.3. Run the test_all_sandia script; various compiler and build-list options can be specified
../config/test_all_sandia
1.4 Clean repository of untracked files
cd ../
git clean -df
// -------------------------------------------------------------------------------- //
Step 2:
2.1 Update Trilinos develop branch
(From Trilinos directory):
git checkout develop
git fetch --all
git reset --hard origin/develop
git clean -df
2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files
module load python/2.7.9
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
// -------------------------------------------------------------------------------- //
Step 3:
3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s):
- GCC/4.7.2-OpenMP/Complex
Run tests with the following environment variable:
export OMP_NUM_THREADS=2
- Intel/15.0.2-Serial/NoComplex
- GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex
Run tests with the following environment variables:
export CUDA_LAUNCH_BLOCKING=1
export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1
mkdir Build
cd Build
cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./
** Set the path to Trilinos appropriately within the configure-all script **
source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos
source configure-all
make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example)
ctest
3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot
// -------------------------------------------------------------------------------- //
Step 4:
4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github
- DO NOT fast-forward the merge!!!!
(From kokkos directory):
git checkout master
git fetch --all
# Ensure we are on the current origin/master
git reset --hard origin/master
git merge --no-ff origin/develop
4.2. Update the tag in kokkos/config/master_history.txt
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
Tag format: #.#.##
# Prepend master_history.txt with
# tag: #.#.##
# date: mm/dd/yyyy
# master: sha1
# develop: sha1
# -----------------------
git commit --amend -a
git tag -a #.#.##
tag: #.#.##
date: mm/dd/yyyy
master: sha1
develop: sha1
git push --follow-tags origin master
// -------------------------------------------------------------------------------- //
Step 5:
5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated
(From Trilinos directory):
git checkout develop
git fetch --all
git reset --hard origin/develop
git clean -df
5.2. Snapshot Kokkos master branch into Trilinos
(From kokkos directory):
git fetch --all
git checkout tags/#.#.##
git clean -df
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
5.3. Push the updated develop branch of Trilinos to Github - congratulations!!!
(From Trilinos directory):
git push
// -------------------------------------------------------------------------------- //

View File

@ -0,0 +1,113 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, Threads, Qthread, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# Qthread
QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,104 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,88 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/config/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,84 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# C++11, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically activate
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,78 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# <none>
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,89 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP, Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,84 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 intel/13.SP1.1.106
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,77 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,87 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Threads, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,3 @@
tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4
tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a

280
lib/kokkos/config/nvcc_wrapper Executable file
View File

@ -0,0 +1,280 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
# The script remedies some differences between the interface of NVCC
# and that of the host compiler, in particular for linking.
# It also means that a legacy code doesn't need separate .cu files;
# it can just use .cpp files.
#
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
#host_compiler="icpc"
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
# C++ files
cpp_files=""
# Host compiler arguments
xcompiler_args=""
# Cuda (NVCC) only arguments
cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Linker arguments
xlinker_args=""
# Object files passable to NVCC
object_files=""
# Link objects for the host linker only
object_files_xlinker=""
# Shared libraries with version numbers are not handled correctly by NVCC
shared_versioned_libraries_host=""
shared_versioned_libraries=""
# Does the User set the architecture
arch_set=0
# Does the user overwrite the host compiler
ccbin_set=0
#Error code of compilation
error_code=0
# Do a dry run without actually compiling
dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
# Mark first host compiler argument
first_xcompiler_arg=1
temp_dir=${TMPDIR:-/tmp}
# Check if we have an optimization argument already
optimization_applied=0
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show|--nvcc-wrapper-show)
dry_run=1
;;
#run host compilation only
--host-only)
host_only=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
-O*)
if [ $optimization_applied -eq 1 ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
else
shared_args="$shared_args $1"
optimization_applied=1
fi
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
shared_args="$shared_args $1"
;;
#Handle shared args that have an argument
-o|-MT)
shared_args="$shared_args $1 $2"
shift
;;
#Handle known nvcc args
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"
shift
;;
#Handle c++11 setting
--std=c++11|-std=c++11)
shared_args="$shared_args $1"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
fi
shift
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="-x,$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,-x,$2"
fi
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
host_compiler=$2
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
host_linker_args="$host_linker_args ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
*.dylib)
object_files="$object_files -Xlinker $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle shared libraries with *.so.* names which nvcc can't do.
*.so.*)
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args=$1
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$1"
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $host_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
if [ $first_xcompiler_arg -eq 0 ]; then
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
fi
#Compose host only command
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
#echo $cpp_files
fi
if [ "$cpp_files" ]; then
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
else
nvcc_command="$nvcc_command $object_files"
fi
if [ "$cpp_files" ]; then
host_command="$host_command $object_files $cpp_files"
else
host_command="$host_command $object_files"
fi
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
else
echo $nvcc_command
fi
exit 0
fi
#Run compilation command
if [ $host_only -eq 1 ]; then
$host_command
else
$nvcc_command
fi
error_code=$?
#Report error code
exit $error_code

279
lib/kokkos/config/snapshot.py Executable file
View File

@ -0,0 +1,279 @@
#! /usr/bin/env python
"""
Snapshot a project into another project and perform the necessary repo actions
to provide a commit message that can be used to trace back to the exact point
in the source repository.
"""
#todo:
# Support svn
# Allow renaming of the source dir in the destination path
# Check if a new snapshot is necessary?
#
import sys
#check the version number so that there is a good error message when argparse is not available.
#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced
#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this
#will need to change, but for now it is not safe to allow 3.x to run this.
if sys.version_info[:2] != (2, 7):
print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1])
sys.exit(1)
import subprocess, argparse, re, doctest, os, datetime, traceback
def parse_cmdline(description):
parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description)
parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True,
help="Do not perform a commit or create a commit message.")
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False,
help="Enable verbose mode.")
parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False,
help="Enable debugging output.")
parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False,
help="Reduce the validation that the source and destination repos are clean to a warning.")
parser.add_argument("--source-repo", choices=["git","none"], default="",
help="Type of repository of the source, use none to skip all repository operations.")
parser.add_argument("--dest-repo", choices=["git","none"], default="",
help="Type of repository of the destination, use none to skip all repository operations.")
parser.add_argument("source", help="Source project to snapshot from.")
parser.add_argument("destination", help="Destination to snapshot too.")
options = parser.parse_args()
options = validate_options(options)
return options
#end parseCmdline
def validate_options(options):
apparent_source_repo_type="none"
apparent_dest_repo_type="none"
#prevent user from accidentally giving us a path that rsync will treat differently than expected.
options.source = options.source.rstrip(os.sep)
options.destination = options.destination.rstrip(os.sep)
options.source = os.path.abspath(options.source)
options.destination = os.path.abspath(options.destination)
if os.path.exists(options.source):
apparent_source_repo_type, source_root = deterimine_repo_type(options.source)
else:
raise RuntimeError("Could not find source directory of %s." % options.source)
options.source_root = source_root
if not os.path.exists(options.destination):
print "Could not find destination directory of %s so it will be created." % options.destination
os.makedirs(options.destination)
apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination)
options.dest_root = dest_root
#error on svn repo types for now
if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn":
raise RuntimeError("SVN repositories are not supported at this time.")
if options.source_repo == "":
#source repo type is not specified to just using the apparent type.
options.source_repo = apparent_source_repo_type
else:
if options.source_repo != "none" and options.source_repo != apparent_source_repo_type:
raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \
(options.source_repo, apparent_source_repo_type))
if options.dest_repo == "":
#destination repo type is not specified to just using the apparent type.
options.dest_repo = apparent_dest_repo_type
else:
if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type:
raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \
(options.dest_repo, apparent_dest_repo_type))
return options
#end validate_options
def run_cmd(cmd, options, working_dir="."):
cmd_str = " ".join(cmd)
if options.verbose_mode:
print "Running command '%s' in dir %s." % (cmd_str, working_dir)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir)
proc_stdout, proc_stderr = proc.communicate()
ret_val = proc.wait()
if options.debug_mode:
print "==== %s stdout start ====" % cmd_str
print proc_stdout
print "==== %s stdout end ====" % cmd_str
print "==== %s stderr ====" % cmd_str
print proc_stderr
print "==== %s stderr ====" % cmd_str
if ret_val != 0:
raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \
(cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout))
return proc_stdout, proc_stderr
#end run_cmd
def deterimine_repo_type(location):
apparent_repo_type = "none"
while location != "":
if os.path.exists(os.path.join(location, ".git")):
apparent_repo_type = "git"
break
elif os.path.exists(os.path.join(location, ".svn")):
apparent_repo_type = "svn"
break
else:
location = location[:location.rfind(os.sep)]
return apparent_repo_type, location
#end deterimine_repo_type
def rsync(source, dest, options):
rsync_cmd = ["rsync", "-ar", "--delete"]
if options.debug_mode:
rsync_cmd.append("-v")
if options.source_repo == "git":
rsync_cmd.append("--exclude=.git")
rsync_cmd.append(options.source)
rsync_cmd.append(options.destination)
run_cmd(rsync_cmd, options)
#end rsync
def create_commit_message(commit_id, commit_log, project_name, project_location):
eol = os.linesep
message = "Snapshot of %s from commit %s" % (project_name, commit_id)
message += eol * 2
message += "From repository at %s" % project_location
message += eol * 2
message += "At commit:" + eol
message += commit_log
return message
#end create_commit_message
def find_git_commit_information(options):
r"""
>>> class fake_options:
... source="."
... verbose_mode=False
... debug_mode=False
>>> myoptions = fake_options()
>>> find_git_commit_information(myoptions)[2:]
('sems', 'software.sandia.gov:/git/sems')
"""
git_log_cmd = ["git", "log", "-1"]
output, error = run_cmd(git_log_cmd, options, options.source)
commit_match = re.match("commit ([0-9a-fA-F]+)", output)
commit_id = commit_match.group(1)
commit_log = output
git_remote_cmd = ["git", "remote", "-v"]
output, error = run_cmd(git_remote_cmd, options, options.source)
remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE)
if not remote_match:
raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source))
source_location = remote_match.group(1)
source_name = remote_match.group(2).strip()
if source_name[-1] == "/":
source_name = source_name[:-1]
return commit_id, commit_log, source_name, source_location
#end find_git_commit_information
def do_git_commit(message, options):
if options.verbose_mode:
print "Commiting to destination repository."
git_add_cmd = ["git", "add", "-A"]
run_cmd(git_add_cmd, options, options.destination)
git_commit_cmd = ["git", "commit", "-m%s" % message]
run_cmd(git_commit_cmd, options, options.destination)
git_log_cmd = ["git", "log", "--format=%h", "-1"]
commit_sha1, error = run_cmd(git_log_cmd, options, options.destination)
print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root)
#end do_git_commit
def verify_git_repo_clean(location, options):
git_status_cmd = ["git", "status", "--porcelain"]
output, error = run_cmd(git_status_cmd, options, location)
if output != "":
if options.no_validate_repo == False:
raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot."
% (location, os.linesep))
else:
print "WARNING: %s is not clean. Proceeding anyway." % location
print "WARNING: This could lead to differences in the source and destination."
print "WARNING: It could also lead to extra files being included in the snapshot commit."
#end verify_git_repo_clean
def main(options):
if options.verbose_mode:
print "Snapshotting %s to %s." % (options.source, options.destination)
if options.source_repo == "git":
verify_git_repo_clean(options.source, options)
commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options)
elif options.source_repo == "none":
commit_id = "N/A"
commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now())
repo_name = options.source
repo_location = options.source
commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2
if options.dest_repo == "git":
verify_git_repo_clean(options.destination, options)
rsync(options.source, options.destination, options)
if options.dest_repo == "git":
do_git_commit(commit_message, options)
elif options.dest_repo == "none":
file_name = "snapshot_message.txt"
message_file = open(file_name, "w")
message_file.write(commit_message)
message_file.close()
cwd = os.getcwd()
print "No commit done by request. Please use file at:"
print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep)
#end main
if (__name__ == "__main__"):
if ("--test" in sys.argv):
doctest.testmod()
sys.exit(0)
try:
options = parse_cmdline(__doc__)
main(options)
except RuntimeError, e:
print "Error occured:", e
if "--debug" in sys.argv:
traceback.print_exc()
sys.exit(1)
else:
sys.exit(0)

539
lib/kokkos/config/test_all_sandia Executable file
View File

@ -0,0 +1,539 @@
#!/bin/bash -e
#
# Global config
#
set -o pipefail
# Determine current machine
MACHINE=""
HOSTNAME=$(hostname)
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
MACHINE=shepard
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
MACHINE=sems
else
echo "Unrecognized machine" >&2
exit 1
fi
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS=""
# Default. Machine specific can override
DEBUG=False
ARGS=""
CUSTOM_BUILD_LIST=""
DRYRUN=False
BUILD_ONLY=False
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
TEST_SCRIPT=False
SKIP_HWLOC=False
ARCH_FLAG=""
#
# Machine specific config
#
if [ "$MACHINE" = "sems" ]; then
source /projects/modulefiles/utils/sems-modules-init.sh
source /projects/modulefiles/utils/kokkos-modules-init.sh
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
elif [ "$MACHINE" = "white" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2"
# Don't do pthread on white
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
)
ARCH_FLAG="--arch=Power8"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
elif [ "$MACHINE" = "bowman" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
ARCH_FLAG="--arch=KNL"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
elif [ "$MACHINE" = "shepard" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
ARCH_FLAG="--arch=HSW"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
else
echo "Unhandled machine $MACHINE" >&2
exit 1
fi
export OMP_NUM_THREADS=4
declare -i NUM_RESULTS_TO_KEEP=7
RESULT_ROOT_PREFIX=TestAll
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
#
# Handle arguments
#
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--build-list*)
CUSTOM_BUILD_LIST="${key#*=}"
;;
--debug*)
DEBUG=True
;;
--build-only*)
BUILD_ONLY=True
;;
--test-script*)
TEST_SCRIPT=True
;;
--skip-hwloc*)
SKIP_HWLOC=True
;;
--num*)
NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
;;
--dry-run*)
DRYRUN=True
;;
--help)
echo "test_all_sandia <ARGS> <OPTIONS>:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo " Defaults to root repo containing this script"
echo "--debug: Run tests in debug. Defaults to False"
echo "--test-script: Test this script, not Kokkos"
echo "--skip-hwloc: Do not do hwloc tests"
echo "--num=N: Number of jobs to run in parallel "
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo ""
echo "ARGS: list of expressions matching compilers to test"
echo " supported compilers sems"
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
COMPILER=${ARR[0]}
echo " $COMPILER"
done
echo ""
echo "Examples:"
echo " Run all tests"
echo " % test_all_sandia"
echo ""
echo " Run all gcc tests"
echo " % test_all_sandia gcc"
echo ""
echo " Run all gcc/4.7.2 and all intel tests"
echo " % test_all_sandia gcc/4.7.2 intel"
echo ""
echo " Run all tests in debug"
echo " % test_all_sandia --debug"
echo ""
echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds"
echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial"
echo ""
echo "If you want to kill the tests, do:"
echo " hit ctrl-z"
echo " % kill -9 %1"
echo
exit 0
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
# set kokkos path
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
# set build type
if [ "$DEBUG" = "True" ]; then
BUILD_TYPE=debug
else
BUILD_TYPE=release
fi
# If no args provided, do all compilers
if [ -z "$ARGS" ]; then
ARGS='?'
fi
# Process args to figure out which compilers to test
COMPILERS_TO_TEST=""
for ARG in $ARGS; do
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
COMPILER=${ARR[0]}
if [[ "$COMPILER" = $ARG* ]]; then
if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then
COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER"
else
echo "Tried to add $COMPILER twice"
fi
fi
done
done
#
# Functions
#
# get_compiler_name <COMPILER>
get_compiler_name() {
echo $1 | cut -d/ -f1
}
# get_compiler_version <COMPILER>
get_compiler_version() {
echo $1 | cut -d/ -f2
}
# Do not call directly
get_compiler_data() {
local compiler=$1
local item=$2
local compiler_name=$(get_compiler_name $compiler)
local compiler_vers=$(get_compiler_version $compiler)
local compiler_data
for compiler_data in "${COMPILERS[@]}" ; do
local arr=($compiler_data)
if [ "$compiler" = "${arr[0]}" ]; then
echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g"
return 0
fi
done
# Not found
echo "Unreconized compiler $compiler" >&2
exit 1
}
#
# For all getters, usage: <GETTER> <COMPILER>
#
get_compiler_modules() {
get_compiler_data $1 1
}
get_compiler_build_list() {
get_compiler_data $1 2
}
get_compiler_exe_name() {
get_compiler_data $1 3
}
get_compiler_warning_flags() {
get_compiler_data $1 4
}
run_cmd() {
echo "RUNNING: $*"
if [ "$DRYRUN" != "True" ]; then
eval "$* 2>&1"
fi
}
# report_and_log_test_results <SUCCESS> <DESC> <COMMENT>
report_and_log_test_result() {
# Use sane var names
local success=$1; local desc=$2; local comment=$3;
if [ "$success" = "0" ]; then
echo " PASSED $desc"
echo $comment > $PASSED_DIR/$desc
else
# For failures, comment should be the name of the phase that failed
echo " FAILED $desc" >&2
echo $comment > $FAILED_DIR/$desc
cat ${desc}.${comment}.log
fi
}
setup_env() {
local compiler=$1
local compiler_modules=$(get_compiler_modules $compiler)
module purge
local mod
for mod in $compiler_modules; do
echo "Loading module $mod"
module load $mod 2>&1
# It is ridiculously hard to check for the success of a loaded
# module. Module does not return error codes and piping to grep
# causes module to run in a subshell.
module list 2>&1 | grep "$mod" >& /dev/null || return 1
done
return 0
}
# single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE>
single_build_and_test() {
# Use sane var names
local compiler=$1; local build=$2; local build_type=$3;
# set up env
mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type"
cd $ROOT_DIR/$compiler/"${build}-$build_type"
local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g')
setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
# Set up flags
local compiler_warning_flags=$(get_compiler_warning_flags $compiler)
local compiler_exe=$(get_compiler_exe_name $compiler)
if [[ "$build_type" = hwloc* ]]; then
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
else
local cxxflags="-O3 $compiler_warning_flags"
fi
if [[ "$compiler" == cuda* ]]; then
cxxflags="--keep --keep-dir=$(pwd) $cxxflags"
export TMPDIR=$(pwd)
fi
# cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags"
echo " Starting job $desc"
local comment="no_comment"
if [ "$TEST_SCRIPT" = "True" ]; then
local rand=$[ 1 + $[ RANDOM % 10 ]]
sleep $rand
if [ $rand -gt 5 ]; then
run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; }
fi
else
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
local -i build_start_time=$(date +%s)
run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
local -i build_end_time=$(date +%s)
comment="build_time=$(($build_end_time-$build_start_time))"
if [[ "$BUILD_ONLY" == False ]]; then
run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; }
local -i run_end_time=$(date +%s)
comment="$comment run_time=$(($run_end_time-$build_end_time))"
fi
fi
report_and_log_test_result 0 $desc "$comment"
return 0
}
# wait_for_jobs <NUM-JOBS>
wait_for_jobs() {
local -i max_jobs=$1
local -i num_active_jobs=$(jobs | wc -l)
while [ $num_active_jobs -ge $max_jobs ]
do
sleep 1
num_active_jobs=$(jobs | wc -l)
jobs >& /dev/null
done
}
# run_in_background <COMPILER> <BUILD> <BUILD_TYPE>
run_in_background() {
local compiler=$1
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
if [[ "$BUILD_ONLY" == True ]]; then
num_jobs=8
else
if [[ "$compiler" == cuda* ]]; then
num_jobs=1
fi
fi
wait_for_jobs $num_jobs
single_build_and_test $* &
}
# build_and_test_all <COMPILER>
build_and_test_all() {
# Get compiler data
local compiler=$1
if [ -z "$CUSTOM_BUILD_LIST" ]; then
local compiler_build_list=$(get_compiler_build_list $compiler)
else
local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ')
fi
# do builds
local build
for build in $compiler_build_list
do
run_in_background $compiler $build $BUILD_TYPE
# If not cuda, do a hwloc test too
if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then
run_in_background $compiler $build "hwloc-$BUILD_TYPE"
fi
done
return 0
}
get_test_root_dir() {
local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort)
local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l)
local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP}
if [ $num_to_delete -gt 0 ]; then
/bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete)
fi
echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S")
}
wait_summarize_and_exit() {
wait_for_jobs 1
echo "#######################################################"
echo "PASSED TESTS"
echo "#######################################################"
local passed_test
for passed_test in $(\ls -1 $PASSED_DIR | sort)
do
echo $passed_test $(cat $PASSED_DIR/$passed_test)
done
echo "#######################################################"
echo "FAILED TESTS"
echo "#######################################################"
local failed_test
local -i rv=0
for failed_test in $(\ls -1 $FAILED_DIR | sort)
do
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
rv=$rv+1
done
exit $rv
}
#
# Main
#
ROOT_DIR=$(get_test_root_dir)
mkdir -p $ROOT_DIR
cd $ROOT_DIR
PASSED_DIR=$ROOT_DIR/results/passed
FAILED_DIR=$ROOT_DIR/results/failed
mkdir -p $PASSED_DIR
mkdir -p $FAILED_DIR
echo "Going to test compilers: " $COMPILERS_TO_TEST
for COMPILER in $COMPILERS_TO_TEST; do
echo "Testing compiler $COMPILER"
build_and_test_all $COMPILER
done
wait_summarize_and_exit

View File

@ -0,0 +1,5 @@
jenkins_test_driver is designed to be run through Jenkins as a
multiconfiguration job. It relies on a number of environment variables that will
only be set when run in that context. It is possible to override these if you
know the Jenkins job setup. It is not recommended that a non-expert try to run
this script directly.

View File

@ -0,0 +1,83 @@
#!/bin/bash -x
echo "Building for BUILD_TYPE = ${BUILD_TYPE}"
echo "Building with HOST_COMPILER = ${HOST_COMPILER}"
echo "Building in ${WORKSPACE}"
module use /home/projects/modulefiles
BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "`
build_options=""
for item in ${BUILD_TYPE}; do
build_options="$build_options --with-$item"
done
kokkos_path=${WORKSPACE}/kokkos
gtest_path=${WORKSPACE}/kokkos/tpls/gtest
echo ${WORKSPACE}
pwd
#extract information from the provided parameters.
host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"`
cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"`
host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"`
cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"`
build_path=`echo $BUILD_TYPE | tr " " "_"`
module load $host_compiler_module
module load $cuda_compiler_module
case $host_compiler_brand in
gcc)
module load nvcc-wrapper/gnu
compiler=g++
;;
intel)
module load nvcc-wrapper/intel
compiler=icpc
;;
*)
echo "Unrecognized compiler brand."
exit 1
;;
esac
#if cuda is on we need to set the host compiler for the
#nvcc wrapper and make the wrapper the compiler.
if [ $cuda_compiler != "" ]; then
export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler
compiler=$kokkos_path/config/nvcc_wrapper
fi
if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then
echo "Intel compilers are not supported with cuda at this time."
exit 0
fi
rm -rf test-$build_path
mkdir test-$build_path
cd test-$build_path
/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out
if [ ${PIPESTATUS[0]} != 0 ]; then
echo "Configure failed."
exit 1
fi
make build-test 2>&1 | tee build.log
if [ ${PIPESTATUS[0]} != 0 ]; then
echo "Build failed."
exit 1
fi
make test 2>&1 | tee test.log
grep "FAIL" test.log
if [ $? == 0 ]; then
echo "Tests failed."
exit 1
fi

View File

@ -0,0 +1,287 @@
#! /usr/bin/env python
"""
Compute the size at which the current compiler will start to
significantly scale back optimization.
The CPP file being modified will need the following tags.
// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate
// JGF_DUPLICATE_END - Put after end of function to duplcate
// JGF_DUPE function_name(args); - Put anywhere where it's legal to
put a function call but not in your timing section.
The program will need to output the string:
FOM: <number>
This will represent the program's performance
"""
import argparse, sys, os, doctest, subprocess, re, time
VERBOSE = False
###############################################################################
def parse_command_line(args, description):
###############################################################################
parser = argparse.ArgumentParser(
usage="""\n%s <cppfile> <build-command> <run-command> [--verbose]
OR
%s --help
OR
%s --test
\033[1mEXAMPLES:\033[0m
> %s foo.cpp 'make -j4' foo
""" % ((os.path.basename(args[0]), ) * 4),
description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("cppfile", help="Name of file to modify.")
parser.add_argument("buildcmd", help="Build command")
parser.add_argument("execmd", help="Run command")
parser.add_argument("-v", "--verbose", action="store_true",
help="Print extra information")
parser.add_argument("-s", "--start", type=int, default=1,
help="Starting number of dupes")
parser.add_argument("-e", "--end", type=int, default=1000,
help="Ending number of dupes")
parser.add_argument("-n", "--repeat", type=int, default=10,
help="Number of times to repeat an individial execution. Best value will be taken.")
parser.add_argument("-t", "--template", action="store_true",
help="Use templating instead of source copying to increase object size")
parser.add_argument("-c", "--csv", action="store_true",
help="Print results as CSV")
args = parser.parse_args(args[1:])
if (args.verbose):
global VERBOSE
VERBOSE = True
return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv
###############################################################################
def verbose_print(msg, override=None):
###############################################################################
if ( (VERBOSE and not override is False) or override):
print msg
###############################################################################
def error_print(msg):
###############################################################################
print >> sys.stderr, msg
###############################################################################
def expect(condition, error_msg):
###############################################################################
"""
Similar to assert except doesn't generate an ugly stacktrace. Useful for
checking user error, not programming error.
"""
if (not condition):
raise SystemExit("FAIL: %s" % error_msg)
###############################################################################
def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None,
arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE):
###############################################################################
verbose_print("RUN: %s" % cmd, verbose)
if (input_str is not None):
stdin = subprocess.PIPE
else:
stdin = None
proc = subprocess.Popen(cmd,
shell=True,
stdout=arg_stdout,
stderr=arg_stderr,
stdin=stdin,
cwd=from_dir)
output, errput = proc.communicate(input_str)
output = output.strip() if output is not None else output
stat = proc.wait()
if (ok_to_fail):
return stat, output, errput
else:
if (arg_stderr is not None):
errput = errput if errput is not None else open(arg_stderr.name, "r").read()
expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput))
else:
expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd)
return output
###############################################################################
def build_and_run(source, cppfile, buildcmd, execmd, repeat):
###############################################################################
open(cppfile, 'w').writelines(source)
run_cmd(buildcmd)
best = None
for i in xrange(repeat):
wait_for_quiet_machine()
output = run_cmd(execmd)
current = None
fom_regex = re.compile(r'^FOM: ([0-9.]+)$')
for line in output.splitlines():
m = fom_regex.match(line)
if (m is not None):
current = float(m.groups()[0])
break
expect(current is not None, "No lines in output matched FOM regex")
if (best is None or best < current):
best = current
return best
###############################################################################
def wait_for_quiet_machine():
###############################################################################
while(True):
time.sleep(2)
# The first iteration of top gives garbage results
idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'")
idle_pct_re = re.compile(r'^([0-9.]+)%id$')
m = idle_pct_re.match(idle_pct_raw)
expect(m is not None, "top not returning output in expected form")
idle_pct = float(m.groups()[0])
if (idle_pct < 95):
error_print("Machine is too busy, waiting for it to become free")
else:
break
###############################################################################
def add_n_dupes(curr_lines, num_dupes, template):
###############################################################################
function_name = None
function_invocation = None
function_lines = []
function_re = re.compile(r'^.* (\w+) *[(]')
function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$')
# Get function lines
record = False
definition_insertion_point = None
invocation_insertion_point = None
for idx, line in enumerate(curr_lines):
if ("JGF_DUPLICATE_BEGIN" in line):
record = True
m = function_re.match(curr_lines[idx+1])
expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1])
function_name = m.groups()[0]
elif ("JGF_DUPLICATE_END" in line):
record = False
definition_insertion_point = idx + 1
elif (record):
function_lines.append(line)
elif ("JGF_DUPE" in line):
m = function_inv_re.match(line)
expect(m is not None, "Could not find function invocation example in line '%s'" % line)
function_invocation = m.groups()[0]
invocation_insertion_point = idx + 1
expect(function_name is not None, "Could not find name of dupe function")
expect(function_invocation is not None, "Could not find function invocation point")
expect(definition_insertion_point < invocation_insertion_point, "fix me")
dupe_func_defs = []
dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"]
for i in xrange(num_dupes):
if (not template):
dupe_func = list(function_lines)
dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i))
dupe_func_defs.extend(dupe_func)
dupe_invocations.append("else if (jgf_rand == %d) " % i)
if (template):
dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n"
else:
dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n"
dupe_invocations.append(dupe_call)
curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations
curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs
###############################################################################
def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False):
###############################################################################
fom_change = (curr_fom - orig_fom) / orig_fom
if (csv):
if (is_first_report):
print "num_dupes, obj_byte_size, loc, fom, pct_diff"
print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100)
else:
print "========================================================"
print "For number of dupes:", num_dupes
print "Object file size (bytes):", os.path.getsize(object_file)
print "Lines of code:", len(curr_lines)
print "Field of merit:", curr_fom
print "Change pct:", fom_change*100
###############################################################################
def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False):
###############################################################################
orig_source_lines = open(cppfile, 'r').readlines()
backup_file = "%s.orig" % cppfile
object_file = "%s.o" % os.path.splitext(cppfile)[0]
os.rename(cppfile, backup_file)
orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat)
report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True)
i = start
while (i < end):
curr_lines = list(orig_source_lines)
add_n_dupes(curr_lines, i, template)
curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat)
report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv)
i *= 2 # make growth function configurable?
os.remove(cppfile)
os.rename(backup_file, cppfile)
###############################################################################
def _main_func(description):
###############################################################################
if ("--test" in sys.argv):
test_results = doctest.testmod(verbose=True)
sys.exit(1 if test_results.failed > 0 else 0)
cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description)
obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv)
###############################################################################
if (__name__ == "__main__"):
_main_func(__doc__)

View File

@ -0,0 +1,10 @@
TRIBITS_SUBPACKAGE(Containers)
ADD_SUBDIRECTORY(src)
TRIBITS_ADD_TEST_DIRECTORIES(unit_tests)
TRIBITS_ADD_TEST_DIRECTORIES(performance_tests)
TRIBITS_SUBPACKAGE_POSTPROCESS()

View File

@ -0,0 +1,5 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
LIB_REQUIRED_PACKAGES KokkosCore
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC
TEST_OPTIONAL_TPLS CUSPARSE
)

View File

@ -0,0 +1,4 @@
#ifndef KOKKOS_CONTAINERS_CONFIG_H
#define KOKKOS_CONTAINERS_CONFIG_H
#endif

View File

@ -0,0 +1,37 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
SET(SOURCES
TestMain.cpp
TestCuda.cpp
)
IF(Kokkos_ENABLE_Pthread)
LIST( APPEND SOURCES TestThreads.cpp)
ENDIF()
IF(Kokkos_ENABLE_OpenMP)
LIST( APPEND SOURCES TestOpenMP.cpp)
ENDIF()
# Per #374, we always want to build this test, but we only want to run
# it as a PERFORMANCE test. That's why we separate building the test
# from running the test.
TRIBITS_ADD_EXECUTABLE(
PerfTestExec
SOURCES ${SOURCES}
COMM serial mpi
TESTONLYLIBS kokkos_gtest
)
TRIBITS_ADD_TEST(
PerformanceTest
NAME PerfTestExec
COMM serial mpi
NUM_MPI_PROCS 1
CATEGORIES PERFORMANCE
FAIL_REGULAR_EXPRESSION " FAILED "
)

View File

@ -0,0 +1,81 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o
TARGETS += KokkosContainers_PerformanceTest_OpenMP
TEST_TARGETS += test-openmp
endif
KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda
KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads
KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP
test-cuda: KokkosContainers_PerformanceTest_Cuda
./KokkosContainers_PerformanceTest_Cuda
test-threads: KokkosContainers_PerformanceTest_Threads
./KokkosContainers_PerformanceTest_Threads
test-openmp: KokkosContainers_PerformanceTest_OpenMP
./KokkosContainers_PerformanceTest_OpenMP
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -0,0 +1,109 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdint.h>
#include <string>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <fstream>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <TestDynRankView.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
namespace Performance {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda, dynrankview_perf )
{
std::cout << "Cuda" << std::endl;
std::cout << " DynRankView vs View: Initialization Only " << std::endl;
test_dynrankview_op_perf<Kokkos::Cuda>( 4096 );
}
TEST_F( cuda, global_2_local)
{
std::cout << "Cuda" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Cuda>(i);
}
TEST_F( cuda, unordered_map_performance_near)
{
Perf::run_performance_tests<Kokkos::Cuda,true>("cuda-near");
}
TEST_F( cuda, unordered_map_performance_far)
{
Perf::run_performance_tests<Kokkos::Cuda,false>("cuda-far");
}
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -0,0 +1,265 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_DYNRANKVIEW_HPP
#define KOKKOS_TEST_DYNRANKVIEW_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_DynRankView.hpp>
#include <vector>
#include <impl/Kokkos_Timer.hpp>
// Compare performance of DynRankView to View, specific focus on the parenthesis operators
namespace Performance {
//View functor
template <typename DeviceType>
struct InitViewFunctor {
typedef Kokkos::View<double***, DeviceType> inviewtype;
inviewtype _inview;
InitViewFunctor( inviewtype &inview_ ) : _inview(inview_)
{}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
}
struct SumComputationTest
{
typedef Kokkos::View<double***, DeviceType> inviewtype;
inviewtype _inview;
typedef Kokkos::View<double*, DeviceType> outviewtype;
outviewtype _outview;
KOKKOS_INLINE_FUNCTION
SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_outview(i) += _inview(i,j,k) ;
}
}
}
};
};
template <typename DeviceType>
struct InitStrideViewFunctor {
typedef Kokkos::View<double***, Kokkos::LayoutStride, DeviceType> inviewtype;
inviewtype _inview;
InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_)
{}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
}
};
template <typename DeviceType>
struct InitViewRank7Functor {
typedef Kokkos::View<double*******, DeviceType> inviewtype;
inviewtype _inview;
InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_)
{}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3;
}
}
}
};
//DynRankView functor
template <typename DeviceType>
struct InitDynRankViewFunctor {
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
inviewtype _inview;
InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_)
{}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
}
struct SumComputationTest
{
typedef Kokkos::DynRankView<double, DeviceType> inviewtype;
inviewtype _inview;
typedef Kokkos::DynRankView<double, DeviceType> outviewtype;
outviewtype _outview;
KOKKOS_INLINE_FUNCTION
SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
_outview(i) += _inview(i,j,k) ;
}
}
}
};
};
template <typename DeviceType>
void test_dynrankview_op_perf( const int par_size )
{
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type;
const size_type dim2 = 900;
const size_type dim3 = 300;
double elapsed_time_view = 0;
double elapsed_time_compview = 0;
double elapsed_time_strideview = 0;
double elapsed_time_view_rank7 = 0;
double elapsed_time_drview = 0;
double elapsed_time_compdrview = 0;
Kokkos::Timer timer;
{
Kokkos::View<double***,DeviceType> testview("testview",par_size,dim2,dim3);
typedef InitViewFunctor<DeviceType> FunctorType;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0,par_size);
Kokkos::parallel_for( policy , FunctorType(testview) );
DeviceType::fence();
elapsed_time_view = timer.seconds();
std::cout << " View time (init only): " << elapsed_time_view << std::endl;
timer.reset();
Kokkos::View<double*,DeviceType> sumview("sumview",par_size);
Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) );
DeviceType::fence();
elapsed_time_compview = timer.seconds();
std::cout << " View sum computation time: " << elapsed_time_view << std::endl;
Kokkos::View<double***,Kokkos::LayoutStride, DeviceType> teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL);
typedef InitStrideViewFunctor<DeviceType> FunctorStrideType;
timer.reset();
Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) );
DeviceType::fence();
elapsed_time_strideview = timer.seconds();
std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl;
}
{
Kokkos::View<double*******,DeviceType> testview("testview",par_size,dim2,dim3,1,1,1,1);
typedef InitViewRank7Functor<DeviceType> FunctorType;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0,par_size);
Kokkos::parallel_for( policy , FunctorType(testview) );
DeviceType::fence();
elapsed_time_view_rank7 = timer.seconds();
std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl;
}
{
Kokkos::DynRankView<double,DeviceType> testdrview("testdrview",par_size,dim2,dim3);
typedef InitDynRankViewFunctor<DeviceType> FunctorType;
timer.reset();
Kokkos::RangePolicy<DeviceType> policy(0,par_size);
Kokkos::parallel_for( policy , FunctorType(testdrview) );
DeviceType::fence();
elapsed_time_drview = timer.seconds();
std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl;
timer.reset();
Kokkos::DynRankView<double,DeviceType> sumview("sumview",par_size);
Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) );
DeviceType::fence();
elapsed_time_compdrview = timer.seconds();
std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl;
}
std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1
std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1
std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1
std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1
std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ?
timer.reset();
} //end test_dynrankview
} //end Performance
#endif

View File

@ -0,0 +1,231 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <vector>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
// This test will simulate global ids
namespace Performance {
static const unsigned begin_id_size = 256u;
static const unsigned end_id_size = 1u << 22;
static const unsigned id_step = 2u;
union helper
{
uint32_t word;
uint8_t byte[4];
};
template <typename Device>
struct generate_ids
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
local_id_view local_2_global;
generate_ids( local_id_view & ids)
: local_2_global(ids)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
helper x = {static_cast<uint32_t>(i)};
// shuffle the bytes of i to create a unique, semi-random global_id
x.word = ~x.word;
uint8_t tmp = x.byte[3];
x.byte[3] = x.byte[1];
x.byte[1] = tmp;
tmp = x.byte[2];
x.byte[2] = x.byte[0];
x.byte[0] = tmp;
local_2_global[i] = x.word;
}
};
template <typename Device>
struct fill_map
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
fill_map( global_id_view gIds, local_id_view lIds)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
global_2_local.insert( local_2_global[i], i);
}
};
template <typename Device>
struct find_test
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
typedef size_t value_type;
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION
void init(value_type & v) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join(volatile value_type & dst, volatile value_type const & src) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(size_type i, value_type & num_errors) const
{
uint32_t index = global_2_local.find( local_2_global[i] );
if ( global_2_local.value_at(index) != i) ++num_errors;
}
};
template <typename Device>
void test_global_to_local_ids(unsigned num_ids)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
//size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u*num_ids)/2u);
//create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
Device::fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
Device::fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i=0; i<100; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
Device::fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ( num_errors, 0u);
}
} // namespace Performance
#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP

View File

@ -0,0 +1,50 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
int main(int argc, char *argv[]) {
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,140 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <TestDynRankView.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
* Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "OpenMP: " << num_threads << std::endl;
Kokkos::OpenMP::initialize( num_threads );
std::cout << "available threads: " << omp_get_max_threads() << std::endl;
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
omp_set_num_threads(1);
ASSERT_EQ( 1 , omp_get_max_threads() );
}
};
TEST_F( openmp, dynrankview_perf )
{
std::cout << "OpenMP" << std::endl;
std::cout << " DynRankView vs View: Initialization Only " << std::endl;
test_dynrankview_op_perf<Kokkos::OpenMP>( 8192 );
}
TEST_F( openmp, global_2_local)
{
std::cout << "OpenMP" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::OpenMP>(i);
}
TEST_F( openmp, unordered_map_performance_near)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-near";
Perf::run_performance_tests<Kokkos::OpenMP,true>(base_file_name.str());
}
TEST_F( openmp, unordered_map_performance_far)
{
unsigned num_openmp = 4;
if (Kokkos::hwloc::available()) {
num_openmp = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "openmp-" << num_openmp << "-far";
Perf::run_performance_tests<Kokkos::OpenMP,false>(base_file_name.str());
}
} // namespace test

View File

@ -0,0 +1,135 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <iomanip>
#include <TestGlobal2LocalIds.hpp>
#include <TestUnorderedMapPerformance.hpp>
#include <TestDynRankView.hpp>
#include <iomanip>
#include <sstream>
#include <string>
#include <fstream>
namespace Performance {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads, dynrankview_perf )
{
std::cout << "Threads" << std::endl;
std::cout << " DynRankView vs View: Initialization Only " << std::endl;
test_dynrankview_op_perf<Kokkos::Threads>( 8192 );
}
TEST_F( threads, global_2_local)
{
std::cout << "Threads" << std::endl;
std::cout << "size, create, generate, fill, find" << std::endl;
for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step)
test_global_to_local_ids<Kokkos::Threads>(i);
}
TEST_F( threads, unordered_map_performance_near)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-near";
Perf::run_performance_tests<Kokkos::Threads,true>(base_file_name.str());
}
TEST_F( threads, unordered_map_performance_far)
{
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::ostringstream base_file_name;
base_file_name << "threads-" << num_threads << "-far";
Perf::run_performance_tests<Kokkos::Threads,false>(base_file_name.str());
}
} // namespace Performance

View File

@ -0,0 +1,262 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#include <impl/Kokkos_Timer.hpp>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <sstream>
namespace Perf {
template <typename Device, bool Near>
struct UnorderedMapTest
{
typedef Device execution_space;
typedef Kokkos::UnorderedMap<uint32_t, uint32_t, execution_space> map_type;
typedef typename map_type::histogram_type histogram_type;
struct value_type {
uint32_t failed_count;
uint32_t max_list;
};
uint32_t capacity;
uint32_t inserts;
uint32_t collisions;
double seconds;
map_type map;
histogram_type histogram;
UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions)
: capacity(arg_capacity)
, inserts(arg_inserts)
, collisions(arg_collisions)
, seconds(0)
, map(capacity)
, histogram(map.get_histogram())
{
Kokkos::Timer wall_clock ;
wall_clock.reset();
value_type v = {};
int loop_count = 0;
do {
++loop_count;
v = value_type();
Kokkos::parallel_reduce(inserts, *this, v);
if (v.failed_count > 0u) {
const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ;
map.rehash( new_capacity );
}
} while (v.failed_count > 0u);
seconds = wall_clock.seconds();
switch (loop_count)
{
case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break;
case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break;
default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break;
}
std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush;
histogram.calculate();
Device::fence();
}
void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out)
{
metrics_out << map.capacity() << " , ";
metrics_out << inserts/collisions << " , ";
metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , ";
metrics_out << inserts << " , ";
metrics_out << (map.failed_insert() ? "true" : "false") << " , ";
metrics_out << collisions << " , ";
metrics_out << 1e9*(seconds/inserts) << " , ";
metrics_out << seconds << std::endl;
length_out << map.capacity() << " , ";
length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
length_out << collisions << " , ";
histogram.print_length(length_out);
distance_out << map.capacity() << " , ";
distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
distance_out << collisions << " , ";
histogram.print_distance(distance_out);
block_distance_out << map.capacity() << " , ";
block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , ";
block_distance_out << collisions << " , ";
histogram.print_block_distance(block_distance_out);
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const
{
v.failed_count = 0;
v.max_list = 0;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{
dst.failed_count += src.failed_count;
dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list;
}
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
const uint32_t key = Near ? i/collisions : i%(inserts/collisions);
typename map_type::insert_result result = map.insert(key,i);
v.failed_count += !result.failed() ? 0 : 1;
v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position();
}
};
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
template <typename Device, bool Near>
void run_performance_tests(std::string const & base_file_name)
{
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
std::string length_file_name = base_file_name + std::string("-length.csv");
std::string distance_file_name = base_file_name + std::string("-distance.csv");
std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv");
std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out );
std::ofstream length_out( length_file_name.c_str(), std::ofstream::out );
std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out );
std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out );
/*
const double test_ratios[] = {
0.50
, 0.75
, 0.80
, 0.85
, 0.90
, 0.95
, 1.00
, 1.25
, 2.00
};
*/
const double test_ratios[] = { 1.00 };
const int num_ratios = sizeof(test_ratios) / sizeof(double);
/*
const uint32_t collisions[] {
1
, 4
, 16
, 64
};
*/
const uint32_t collisions[] = { 16 };
const int num_collisions = sizeof(collisions) / sizeof(uint32_t);
// set up file headers
metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl;
length_out << "Capacity , Percent Full , ";
distance_out << "Capacity , Percent Full , ";
block_distance_out << "Capacity , Percent Full , ";
for (int i=0; i<100; ++i) {
length_out << i << " , ";
distance_out << i << " , ";
block_distance_out << i << " , ";
}
length_out << "\b\b\b " << std::endl;
distance_out << "\b\b\b " << std::endl;
block_distance_out << "\b\b\b " << std::endl;
Kokkos::Timer wall_clock ;
for (int i=0; i < num_collisions ; ++i) {
wall_clock.reset();
std::cout << "Collisions: " << collisions[i] << std::endl;
for (int j = 0; j < num_ratios; ++j) {
std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush;
for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) {
uint32_t inserts = static_cast<uint32_t>(test_ratios[j]*(capacity));
std::cout << capacity << std::flush;
UnorderedMapTest<Device, Near> test(capacity, inserts*collisions[i], collisions[i]);
Device::fence();
test.print(metrics_out, length_out, distance_out, block_distance_out);
}
std::cout << "\b\b " << std::endl;
}
std::cout << " " << wall_clock.seconds() << " secs" << std::endl;
}
metrics_out.close();
length_out.close();
distance_out.close();
block_distance_out.close();
#else
(void)base_file_name;
std::cout << "skipping test" << std::endl;
#endif
}
} // namespace Perf
#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP

View File

@ -0,0 +1,31 @@
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
#-----------------------------------------------------------------------------
SET(HEADERS "")
SET(SOURCES "")
SET(HEADERS_IMPL "")
FILE(GLOB HEADERS *.hpp)
FILE(GLOB HEADERS_IMPL impl/*.hpp)
FILE(GLOB SOURCES impl/*.cpp)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/)
TRIBITS_ADD_LIBRARY(
kokkoscontainers
HEADERS ${HEADERS}
NOINSTALLHEADERS ${HEADERS_IMPL}
SOURCES ${SOURCES}
DEPLIBS
)
#-----------------------------------------------------------------------------

View File

@ -0,0 +1,437 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_HPP
#define KOKKOS_BITSET_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <impl/Kokkos_Bitset_impl.hpp>
#include <stdexcept>
namespace Kokkos {
template <typename Device = Kokkos::DefaultExecutionSpace >
class Bitset;
template <typename Device = Kokkos::DefaultExecutionSpace >
class ConstBitset;
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
/// A thread safe view to a bitset
template <typename Device>
class Bitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
enum {
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u
, BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE
, BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD
, BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
};
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size-1u };
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
public:
/// constructor
/// arg_size := number of bit in set
Bitset(unsigned arg_size = 0u)
: m_size(arg_size)
, m_last_block_mask(0u)
, m_blocks("Bitset", ((m_size + block_mask) >> block_shift) )
{
for (int i=0, end = static_cast<int>(m_size & block_mask); i < end; ++i) {
m_last_block_mask |= 1u << i;
}
}
/// assignment
Bitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_last_block_mask = rhs.m_last_block_mask;
this->m_blocks = rhs.m_blocks;
return *this;
}
/// copy constructor
Bitset( Bitset<Device> const & rhs)
: m_size( rhs.m_size )
, m_last_block_mask( rhs.m_last_block_mask )
, m_blocks( rhs.m_blocks )
{}
/// number of bits in the set
/// can be call from the host or the device
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{ return m_size; }
/// number of bits which are set to 1
/// can only be called from the host
unsigned count() const
{
Impl::BitsetCount< Bitset<Device> > f(*this);
return f.apply();
}
/// set all bits to 1
/// can only be called from the host
void set()
{
Kokkos::deep_copy(m_blocks, ~0u );
if (m_last_block_mask) {
//clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
}
}
/// set all bits to 0
/// can only be called from the host
void reset()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set all bits to 0
/// can only be called from the host
void clear()
{
Kokkos::deep_copy(m_blocks, 0u );
}
/// set i'th bit to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool set( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return !( atomic_fetch_or( block_ptr, mask ) & mask );
}
return false;
}
/// set i'th bit to 0
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool reset( unsigned i ) const
{
if ( i < m_size ) {
unsigned * block_ptr = &m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return atomic_fetch_and( block_ptr, ~mask ) & mask;
}
return false;
}
/// return true if the i'th bit set to 1
/// can only be called from the device
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]);
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
/// used with find_any_set_near or find_any_unset_near functions
/// returns the max number of times those functions should be call
/// when searching for an available bit
KOKKOS_FORCEINLINE_FUNCTION
unsigned max_hint() const
{
return m_blocks.dimension_0();
}
/// find a bit set to 1 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
/// find a bit set to 0 near the hint
/// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found
/// and if result.first is false the result.second is a new hint
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = hint >> block_shift;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
private:
KOKKOS_FORCEINLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const
{
Kokkos::pair<bool, unsigned> result( block > 0u, 0);
if (!result.first) {
result.second = update_hint( block_idx, offset, scan_direction );
}
else {
result.second = scan_block( (block_idx << block_shift)
, offset
, block
, scan_direction
);
}
return result;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const
{
offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask;
block = Impl::rotate_right(block, offset);
return ((( !(scan_direction & BIT_SCAN_REVERSE) ?
Impl::bit_scan_forward(block) :
Impl::bit_scan_reverse(block)
) + offset
) & block_mask
) + block_start;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
{
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
return static_cast<unsigned>(block_idx)*block_size + offset;
}
private:
unsigned m_size;
unsigned m_last_block_mask;
View< unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class Bitset;
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
/// a thread-safe view to a const bitset
/// i.e. can only test bits
template <typename Device>
class ConstBitset
{
public:
typedef Device execution_space;
typedef unsigned size_type;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned)*CHAR_BIT) };
enum { block_mask = block_size -1u };
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
public:
ConstBitset()
: m_size (0)
{}
ConstBitset(Bitset<Device> const& rhs)
: m_size(rhs.m_size)
, m_blocks(rhs.m_blocks)
{}
ConstBitset(ConstBitset<Device> const& rhs)
: m_size( rhs.m_size )
, m_blocks( rhs.m_blocks )
{}
ConstBitset<Device> & operator = (Bitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
ConstBitset<Device> & operator = (ConstBitset<Device> const & rhs)
{
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
return *this;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned size() const
{
return m_size;
}
unsigned count() const
{
Impl::BitsetCount< ConstBitset<Device> > f(*this);
return f.apply();
}
KOKKOS_FORCEINLINE_FUNCTION
bool test( unsigned i ) const
{
if ( i < m_size ) {
const unsigned block = m_blocks[ i >> block_shift ];
const unsigned mask = 1u << static_cast<int>( i & block_mask );
return block & mask;
}
return false;
}
private:
unsigned m_size;
View< const unsigned *, execution_space, MemoryTraits<RandomAccess> > m_blocks;
private:
template <typename DDevice>
friend class ConstBitset;
template <typename Bitset>
friend struct Impl::BitsetCount;
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
template <typename DstDevice, typename SrcDevice>
friend void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src);
};
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
template <typename DstDevice, typename SrcDevice>
void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
{
if (dst.size() != src.size()) {
throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!");
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
}
} // namespace Kokkos
#endif //KOKKOS_BITSET_HPP

View File

@ -0,0 +1,982 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_DualView.hpp
/// \brief Declaration and definition of Kokkos::DualView.
///
/// This header file declares and defines Kokkos::DualView and its
/// related nonmember functions.
#ifndef KOKKOS_DUALVIEW_HPP
#define KOKKOS_DUALVIEW_HPP
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
namespace Kokkos {
/* \class DualView
* \brief Container to manage mirroring a Kokkos::View that lives
* in device memory with a Kokkos::View that lives in host memory.
*
* This class provides capabilities to manage data which exists in two
* memory spaces at the same time. It keeps views of the same layout
* on two memory spaces as well as modified flags for both
* allocations. Users are responsible for setting the modified flags
* manually if they change the data in either memory space, by calling
* the sync() method templated on the device where they modified the
* data. Users may synchronize data by calling the modify() function,
* templated on the device towards which they want to synchronize
* (i.e., the target of the one-way copy operation).
*
* The DualView class also provides convenience methods such as
* realloc, resize and capacity which call the appropriate methods of
* the underlying Kokkos::View objects.
*
* The four template arguments are the same as those of Kokkos::View.
* (Please refer to that class' documentation for a detailed
* description.)
*
* \tparam DataType The type of the entries stored in the container.
*
* \tparam Layout The array's layout in memory.
*
* \tparam Device The Kokkos Device type. If its memory space is
* not the same as the host's memory space, then DualView will
* contain two separate Views: one in device memory, and one in
* host memory. Otherwise, DualView will only store one View.
*
* \tparam MemoryTraits (optional) The user's intended memory access
* behavior. Please see the documentation of Kokkos::View for
* examples. The default suffices for most users.
*/
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The Kokkos Host Device type;
typedef typename traits::host_mirror_space host_mirror_space ;
//! The type of a Kokkos::View on the device.
typedef View< typename traits::data_type ,
Arg1Type ,
Arg2Type ,
Arg3Type > t_dev ;
/// \typedef t_host
/// \brief The type of a Kokkos::View host mirror of \c t_dev.
typedef typename t_dev::HostMirror t_host ;
//! The type of a const View on the device.
//! The type of a Kokkos::View on the device.
typedef View< typename traits::const_data_type ,
Arg1Type ,
Arg2Type ,
Arg3Type > t_dev_const ;
/// \typedef t_host_const
/// \brief The type of a const View host mirror of \c t_dev_const.
typedef typename t_dev_const::HostMirror t_host_const;
//! The type of a const, random-access View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_dev_const_randomread ;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread;
//! The type of an unmanaged View on the device.
typedef View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_um;
//! The type of an unmanaged View host mirror of \c t_dev_um.
typedef View< typename t_host::data_type ,
typename t_host::array_layout ,
typename t_host::device_type ,
MemoryUnmanaged> t_host_um;
//! The type of a const unmanaged View on the device.
typedef View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::device_type ,
MemoryUnmanaged> t_dev_const_um;
//! The type of a const unmanaged View host mirror of \c t_dev_const_um.
typedef View<typename t_host::const_data_type,
typename t_host::array_layout,
typename t_host::device_type,
MemoryUnmanaged> t_host_const_um;
//! The type of a const, random-access View on the device.
typedef View< typename t_host::const_data_type ,
typename t_host::array_layout ,
typename t_host::device_type ,
Kokkos::MemoryTraits<Kokkos::Unmanaged|Kokkos::RandomAccess> > t_dev_const_randomread_um ;
/// \typedef t_host_const_randomread
/// \brief The type of a const, random-access View host mirror of
/// \c t_dev_const_randomread.
typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um;
//@}
//! \name The two View instances.
//@{
t_dev d_view;
t_host h_view;
//@}
//! \name Counters to keep track of changes ("modified" flags)
//@{
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_device;
View<unsigned int,LayoutLeft,typename t_host::execution_space> modified_host;
//@}
//! \name Constructors
//@{
/// \brief Empty constructor.
///
/// Both device and host View objects are constructed using their
/// default constructors. The "modified" flags are both initialized
/// to "unmodified."
DualView () :
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different DualView objects may have the same label if
/// you like.) The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
DualView (const std::string& label,
const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0)
: d_view (label, n0, n1, n2, n3, n4, n5, n6, n7)
, h_view (create_mirror_view (d_view)) // without UVM, host View mirrors
, modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device"))
, modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{}
//! Copy constructor (shallow copy)
template<class SS, class LS, class DS, class MS>
DualView (const DualView<SS,LS,DS,MS>& src) :
d_view (src.d_view),
h_view (src.h_view),
modified_device (src.modified_device),
modified_host (src.modified_host)
{}
//! Subview constructor
template< class SD, class S1 , class S2 , class S3
, class Arg0 , class ... Args >
DualView( const DualView<SD,S1,S2,S3> & src
, const Arg0 & arg0
, Args ... args
)
: d_view( Kokkos::subview( src.d_view , arg0 , args ... ) )
, h_view( Kokkos::subview( src.h_view , arg0 , args ... ) )
, modified_device (src.modified_device)
, modified_host (src.modified_host)
{}
/// \brief Create DualView from existing device and host View objects.
///
/// This constructor assumes that the device and host View objects
/// are synchronized. You, the caller, are responsible for making
/// sure this is the case before calling this constructor. After
/// this constructor returns, you may use DualView's sync() and
/// modify() methods to ensure synchronization of the View objects.
///
/// \param d_view_ Device View
/// \param h_view_ Host View (must have type t_host = t_dev::HostMirror)
DualView (const t_dev& d_view_, const t_host& h_view_) :
d_view (d_view_),
h_view (h_view_),
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{
#if ! KOKKOS_USING_EXP_VIEW
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
#else
if ( int(d_view.rank) != int(h_view.rank) ||
d_view.dimension_0() != h_view.dimension_0() ||
d_view.dimension_1() != h_view.dimension_1() ||
d_view.dimension_2() != h_view.dimension_2() ||
d_view.dimension_3() != h_view.dimension_3() ||
d_view.dimension_4() != h_view.dimension_4() ||
d_view.dimension_5() != h_view.dimension_5() ||
d_view.dimension_6() != h_view.dimension_6() ||
d_view.dimension_7() != h_view.dimension_7() ||
d_view.stride_0() != h_view.stride_0() ||
d_view.stride_1() != h_view.stride_1() ||
d_view.stride_2() != h_view.stride_2() ||
d_view.stride_3() != h_view.stride_3() ||
d_view.stride_4() != h_view.stride_4() ||
d_view.stride_5() != h_view.stride_5() ||
d_view.stride_6() != h_view.stride_6() ||
d_view.stride_7() != h_view.stride_7() ||
d_view.span() != h_view.span() ) {
Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
}
#endif
}
//@}
//! \name Methods for synchronizing, marking as modified, and getting Views.
//@{
/// \brief Return a View on a specific device \c Device.
///
/// Please don't be afraid of the if_c expression in the return
/// value's type. That just tells the method what the return type
/// should be: t_dev if the \c Device template parameter matches
/// this DualView's device type, else t_host.
///
/// For example, suppose you create a DualView on Cuda, like this:
/// \code
/// typedef Kokkos::DualView<float, Kokkos::LayoutRight, Kokkos::Cuda> dual_view_type;
/// dual_view_type DV ("my dual view", 100);
/// \endcode
/// If you want to get the CUDA device View, do this:
/// \code
/// typename dual_view_type::t_dev cudaView = DV.view<Kokkos::Cuda> ();
/// \endcode
/// and if you want to get the host mirror of that View, do this:
/// \code
/// typedef typename Kokkos::HostSpace::execution_space host_device_type;
/// typename dual_view_type::t_host hostView = DV.view<host_device_type> ();
/// \endcode
template< class Device >
KOKKOS_INLINE_FUNCTION
const typename Impl::if_c<
Impl::is_same<typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host>::type& view () const
{
return Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
t_dev,
t_host >::select (d_view , h_view);
}
/// \brief Update data on device or host only if data in the other
/// space has been marked as modified.
///
/// If \c Device is the same as this DualView's device type, then
/// copy data from host to device. Otherwise, copy data from device
/// to host. In either case, only copy if the source of the copy
/// has been modified.
///
/// This is a one-way synchronization only. If the target of the
/// copy has been modified, this operation will discard those
/// modifications. It will also reset both device and host modified
/// flags.
///
/// \note This method doesn't know on its own whether you modified
/// the data in either View. You must manually mark modified data
/// as modified, by calling the modify() method with the
/// appropriate template parameter.
template<class Device>
void sync( const typename Impl::enable_if<
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0)
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
deep_copy (d_view, h_view);
modified_host() = modified_device() = 0;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
deep_copy (h_view, d_view);
modified_host() = modified_device() = 0;
}
}
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
t_dev::execution_space::fence();
t_host::execution_space::fence();
}
}
template<class Device>
void sync ( const typename Impl::enable_if<
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
( Impl::is_same< Device , int>::value)
, int >::type& = 0 )
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype.");
}
}
}
template<class Device>
bool need_sync() const
{
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value ,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
if ((modified_host () > 0) && (modified_host () >= modified_device ())) {
return true;
}
} else { // hopefully Device is the same as DualView's host type
if ((modified_device () > 0) && (modified_device () >= modified_host ())) {
return true;
}
}
return false;
}
/// \brief Mark data as modified on the given device \c Device.
///
/// If \c Device is the same as this DualView's device type, then
/// mark the device's data as modified. Otherwise, mark the host's
/// data as modified.
template<class Device>
void modify () {
const unsigned int dev =
Impl::if_c<
Impl::is_same<
typename t_dev::memory_space,
typename Device::memory_space>::value,
unsigned int,
unsigned int>::select (1, 0);
if (dev) { // if Device is the same as DualView's device type
// Increment the device's modified count.
modified_device () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
} else { // hopefully Device is the same as DualView's host type
// Increment the host's modified count.
modified_host () = (modified_device () > modified_host () ?
modified_device () : modified_host ()) + 1;
}
}
//@}
//! \name Methods for reallocating or resizing the View objects.
//@{
/// \brief Reallocate both View objects.
///
/// This discards any existing contents of the objects, and resets
/// their modified flags. It does <i>not</i> copy the old contents
/// of either View into the new View objects.
void realloc( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Reset dirty flags */
modified_device() = modified_host() = 0;
}
/// \brief Resize both views, copying old contents into new if necessary.
///
/// This method only copies the old contents into the new View
/// objects for the device which was last marked as modified.
void resize( const size_t n0 = 0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0 ) {
if(modified_device() >= modified_host()) {
/* Resize on Device */
::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
h_view = create_mirror_view( d_view );
/* Mark Device copy as modified */
modified_device() = modified_device()+1;
} else {
/* Realloc on Device */
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
t_host temp_view = create_mirror_view( d_view );
/* Remap on Host */
Kokkos::deep_copy( temp_view , h_view );
h_view = temp_view;
/* Mark Host copy as modified */
modified_host() = modified_host()+1;
}
}
//@}
//! \name Methods for getting capacity, stride, or dimension(s).
//@{
//! The allocation size (same as Kokkos::View::capacity).
size_t capacity() const {
#if KOKKOS_USING_EXP_VIEW
return d_view.span();
#else
return d_view.capacity();
#endif
}
//! Get stride(s) for each dimension.
template< typename iType>
void stride(iType* stride_) const {
d_view.stride(stride_);
}
/* \brief return size of dimension 0 */
size_t dimension_0() const {return d_view.dimension_0();}
/* \brief return size of dimension 1 */
size_t dimension_1() const {return d_view.dimension_1();}
/* \brief return size of dimension 2 */
size_t dimension_2() const {return d_view.dimension_2();}
/* \brief return size of dimension 3 */
size_t dimension_3() const {return d_view.dimension_3();}
/* \brief return size of dimension 4 */
size_t dimension_4() const {return d_view.dimension_4();}
/* \brief return size of dimension 5 */
size_t dimension_5() const {return d_view.dimension_5();}
/* \brief return size of dimension 6 */
size_t dimension_6() const {return d_view.dimension_6();}
/* \brief return size of dimension 7 */
size_t dimension_7() const {return d_view.dimension_7();}
//@}
};
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//
// Partial specializations of Kokkos::subview() for DualView objects.
//
#if KOKKOS_USING_EXP_VIEW
namespace Kokkos {
namespace Impl {
template< class D, class A1, class A2, class A3, class ... Args >
struct DualViewSubview {
typedef typename Kokkos::Experimental::Impl::ViewMapping
< void
, Kokkos::ViewTraits< D, A1, A2, A3 >
, Args ...
>::traits_type dst_traits ;
typedef Kokkos::DualView
< typename dst_traits::data_type
, typename dst_traits::array_layout
, typename dst_traits::device_type
, typename dst_traits::memory_traits
> type ;
};
} /* namespace Impl */
template< class D , class A1 , class A2 , class A3 , class ... Args >
typename Impl::DualViewSubview<D,A1,A2,A3,Args...>::type
subview( const DualView<D,A1,A2,A3> & src , Args ... args )
{
return typename
Impl::DualViewSubview<D,A1,A2,A3,Args...>::type( src , args ... );
}
} /* namespace Kokkos */
#else
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//
// Partial specializations of Kokkos::subview() for DualView objects.
//
namespace Kokkos {
namespace Impl {
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
>
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
{
private:
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
// The source view rank must be equal to the input argument rank
// Once a void argument is encountered all subsequent arguments must be void.
enum { InputRank =
Impl::StaticAssert<( SrcViewType::rank ==
( V0 ? 0 : (
V1 ? 1 : (
V2 ? 2 : (
V3 ? 3 : (
V4 ? 4 : (
V5 ? 5 : (
V6 ? 6 : (
V7 ? 7 : 8 ))))))) ))
&&
( SrcViewType::rank ==
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
>::value ? SrcViewType::rank : 0 };
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
// Reverse
enum { R0_rev = 0 == InputRank ? 0u : (
1 == InputRank ? unsigned(R0) : (
2 == InputRank ? unsigned(R1) : (
3 == InputRank ? unsigned(R2) : (
4 == InputRank ? unsigned(R3) : (
5 == InputRank ? unsigned(R4) : (
6 == InputRank ? unsigned(R5) : (
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
typedef typename SrcViewType::array_layout SrcViewLayout ;
// Choose array layout, attempting to preserve original layout if at all possible.
typedef typename Impl::if_c<
( // Same Layout IF
// OutputRank 0
( OutputRank == 0 )
||
// OutputRank 1 or 2, InputLayout Left, Interval 0
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
// because single stride one or second index has a stride.
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
typename SrcViewType::value_type ********
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
// Choose space.
// If the source view's template arg1 or arg2 is a space then use it,
// otherwise use the source view's execution space.
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
>::type >::type OutputSpace ;
public:
// If keeping the layout then match non-data type arguments
// else keep execution space and memory traits.
typedef typename
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
, typename SrcViewType::memory_traits >
>::type type ;
};
} /* namespace Impl */
} /* namespace Kokkos */
namespace Kokkos {
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , void , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0);
sub_view.h_view = subview(src.h_view,arg0);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , void , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1);
sub_view.h_view = subview(src.h_view,arg0,arg1);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , void
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, void , void , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , void , void ,void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , void , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , void
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
template< class D , class A1 , class A2 , class A3 ,
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
subview( const DualView<D,A1,A2,A3> & src ,
const ArgType0 & arg0 ,
const ArgType1 & arg1 ,
const ArgType2 & arg2 ,
const ArgType3 & arg3 ,
const ArgType4 & arg4 ,
const ArgType5 & arg5 ,
const ArgType6 & arg6 ,
const ArgType7 & arg7 )
{
typedef typename
Impl::ViewSubview< DualView<D,A1,A2,A3>
, ArgType0 , ArgType1 , ArgType2 , ArgType3
, ArgType4 , ArgType5 , ArgType6 , ArgType7
>::type
DstViewType ;
DstViewType sub_view;
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
sub_view.modified_device = src.modified_device;
sub_view.modified_host = src.modified_host;
return sub_view;
}
} // namespace Kokkos
#endif /* KOKKOS_USING_EXP_VIEW */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//
// Partial specialization of Kokkos::deep_copy() for DualView objects.
//
template< class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
template< class ExecutionSpace ,
class DT , class DL , class DD , class DM ,
class ST , class SL , class SD , class SM >
void
deep_copy (const ExecutionSpace& exec ,
DualView<DT,DL,DD,DM> dst, // trust me, this must not be a reference
const DualView<ST,SL,SD,SM>& src )
{
if (src.modified_device () >= src.modified_host ()) {
deep_copy (exec, dst.d_view, src.d_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::device_type> ();
} else {
deep_copy (exec, dst.h_view, src.h_view);
dst.template modify<typename DualView<DT,DL,DD,DM>::host_mirror_space> ();
}
}
} // namespace Kokkos
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,494 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_DYNAMIC_VIEW_HPP
#define KOKKOS_DYNAMIC_VIEW_HPP
#include <cstdio>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
namespace Kokkos {
namespace Experimental {
/** \brief Dynamic views are restricted to rank-one and no layout.
* Subviews are not allowed.
*/
template< typename DataType , typename ... P >
class DynamicView : public Kokkos::Experimental::ViewTraits< DataType , P ... >
{
public:
typedef ViewTraits< DataType , P ... > traits ;
private:
template< class , class ... > friend class DynamicView ;
typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
static_assert( traits::rank == 1 && traits::rank_dynamic == 1
, "DynamicView must be rank-one" );
static_assert( std::is_trivial< typename traits::value_type >::value &&
std::is_same< typename traits::specialize , void >::value
, "DynamicView must have trivial data type" );
public:
typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ;
private:
memory_pool m_pool ;
track_type m_track ;
typename traits::value_type ** m_chunks ;
unsigned m_chunk_shift ;
unsigned m_chunk_mask ;
unsigned m_chunk_max ;
public:
//----------------------------------------------------------------------
/** \brief Compatible view of array of scalar types */
typedef DynamicView< typename traits::data_type ,
typename traits::device_type >
array_type ;
/** \brief Compatible view of const data type */
typedef DynamicView< typename traits::const_data_type ,
typename traits::device_type >
const_type ;
/** \brief Compatible view of non-const data type */
typedef DynamicView< typename traits::non_const_data_type ,
typename traits::device_type >
non_const_type ;
/** \brief Must be accessible everywhere */
typedef DynamicView HostMirror ;
//----------------------------------------------------------------------
enum { Rank = 1 };
KOKKOS_INLINE_FUNCTION constexpr size_t size() const
{
return
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
< Kokkos::Impl::ActiveExecutionMemorySpace
, typename traits::memory_space
>::value
? // Runtime size is at the end of the chunk pointer array
(*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ))
<< m_chunk_shift
: 0 ;
}
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr
size_t extent( const iType & r ) const
{ return r == 0 ? size() : 1 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr
size_t extent_int( const iType & r ) const
{ return r == 0 ? size() : 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return size(); }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return 1 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return 0 ; }
template< typename iType >
KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { *s = 0 ; }
//----------------------------------------------------------------------
// Range span is the span which contains all members.
typedef typename traits::value_type & reference_type ;
typedef typename traits::value_type * pointer_type ;
enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value };
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return false ; }
KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return 0 ; }
KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return 0 ; }
//----------------------------------------
template< typename I0 , class ... Args >
KOKKOS_INLINE_FUNCTION
reference_type operator()( const I0 & i0 , const Args & ... args ) const
{
static_assert( Kokkos::Impl::are_integral<I0,Args...>::value
, "Indices must be integral type" );
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
< Kokkos::Impl::ActiveExecutionMemorySpace
, typename traits::memory_space
>::verify();
// Which chunk is being indexed.
const uintptr_t ic = uintptr_t( i0 >> m_chunk_shift );
typename traits::value_type * volatile * const ch = m_chunks + ic ;
// Do bounds checking if enabled or if the chunk pointer is zero.
// If not bounds checking then we assume a non-zero pointer is valid.
#if ! defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
if ( 0 == *ch )
#endif
{
// Verify that allocation of the requested chunk in in progress.
// The allocated chunk counter is m_chunks[ m_chunk_max ]
const uintptr_t n =
*reinterpret_cast<uintptr_t volatile *>( m_chunks + m_chunk_max );
if ( n <= ic ) {
Kokkos::abort("Kokkos::DynamicView array bounds error");
}
// Allocation of this chunk is in progress
// so wait for allocation to complete.
while ( 0 == *ch );
}
return (*ch)[ i0 & m_chunk_mask ];
}
//----------------------------------------
/** \brief Resizing in parallel only increases the array size,
* never decrease.
*/
KOKKOS_INLINE_FUNCTION
void resize_parallel( size_t n ) const
{
typedef typename traits::value_type value_type ;
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
< Kokkos::Impl::ActiveExecutionMemorySpace
, typename traits::memory_space >::verify();
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
if ( m_chunk_max < NC ) {
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
printf("DynamicView::resize_parallel(%lu) m_chunk_max(%lu) NC(%lu)\n"
, n , m_chunk_max , NC );
#endif
Kokkos::abort("DynamicView::resize_parallel exceeded maximum size");
}
typename traits::value_type * volatile * const ch = m_chunks ;
// The allocated chunk counter is m_chunks[ m_chunk_max ]
uintptr_t volatile * const pc =
reinterpret_cast<uintptr_t volatile*>( m_chunks + m_chunk_max );
// Potentially concurrent iteration of allocation to the required size.
for ( uintptr_t jc = *pc ; jc < NC ; ) {
// Claim the 'jc' chunk to-be-allocated index
const uintptr_t jc_try = jc ;
// Jump iteration to the chunk counter.
jc = atomic_compare_exchange( pc , jc_try , jc_try + 1 );
if ( jc_try == jc ) {
ch[jc_try] = reinterpret_cast<value_type*>(
m_pool.allocate( sizeof(value_type) << m_chunk_shift ));
Kokkos::memory_fence();
}
}
}
/** \brief Resizing in serial can grow or shrink the array size, */
inline
void resize_serial( size_t n )
{
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
< Kokkos::Impl::ActiveExecutionMemorySpace
, typename traits::memory_space >::verify();
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
if ( m_chunk_max < NC ) {
Kokkos::abort("DynamicView::resize_serial exceeded maximum size");
}
uintptr_t * const pc =
reinterpret_cast<uintptr_t*>( m_chunks + m_chunk_max );
if ( *pc < NC ) {
while ( *pc < NC ) {
m_chunks[*pc] =
m_pool.allocate( sizeof(traits::value_type) << m_chunk_shift );
++*pc ;
}
}
else {
while ( NC + 1 <= *pc ) {
--*pc ;
m_pool.deallocate( m_chunks[*pc]
, sizeof(traits::value_type) << m_chunk_shift );
m_chunks[*pc] = 0 ;
}
}
}
//----------------------------------------------------------------------
~DynamicView() = default ;
DynamicView() = default ;
DynamicView( DynamicView && ) = default ;
DynamicView( const DynamicView & ) = default ;
DynamicView & operator = ( DynamicView && ) = default ;
DynamicView & operator = ( const DynamicView & ) = default ;
template< class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
DynamicView( const DynamicView<RT,RP...> & rhs )
: m_pool( rhs.m_pool )
, m_track( rhs.m_track )
, m_chunks( rhs.m_chunks )
, m_chunk_shift( rhs.m_chunk_shift )
, m_chunk_mask( rhs.m_chunk_mask )
, m_chunk_max( rhs.m_chunk_max )
{
}
//----------------------------------------------------------------------
struct Destroy {
memory_pool m_pool ;
typename traits::value_type ** m_chunks ;
unsigned m_chunk_max ;
bool m_destroy ;
// Initialize or destroy array of chunk pointers.
// Two entries beyond the max chunks are allocation counters.
KOKKOS_INLINE_FUNCTION
void operator()( unsigned i ) const
{
if ( m_destroy && i < m_chunk_max && 0 != m_chunks[i] ) {
m_pool.deallocate( m_chunks[i] , m_pool.get_min_block_size() );
}
m_chunks[i] = 0 ;
}
void execute( bool arg_destroy )
{
typedef Kokkos::RangePolicy< typename traits::execution_space > Range ;
m_destroy = arg_destroy ;
Kokkos::Impl::ParallelFor<Destroy,Range>
closure( *this , Range(0, m_chunk_max + 1) );
closure.execute();
traits::execution_space::fence();
}
void construct_shared_allocation()
{ execute( false ); }
void destroy_shared_allocation()
{ execute( true ); }
Destroy() = default ;
Destroy( Destroy && ) = default ;
Destroy( const Destroy & ) = default ;
Destroy & operator = ( Destroy && ) = default ;
Destroy & operator = ( const Destroy & ) = default ;
Destroy( const memory_pool & arg_pool
, typename traits::value_type ** arg_chunk
, const unsigned arg_chunk_max )
: m_pool( arg_pool )
, m_chunks( arg_chunk )
, m_chunk_max( arg_chunk_max )
, m_destroy( false )
{}
};
/**\brief Allocation constructor
*
* Memory is allocated in chunks from the memory pool.
* The chunk size conforms to the memory pool's chunk size.
* A maximum size is required in order to allocate a
* chunk-pointer array.
*/
explicit inline
DynamicView( const std::string & arg_label
, const memory_pool & arg_pool
, const size_t arg_size_max )
: m_pool( arg_pool )
, m_track()
, m_chunks(0)
// The memory pool chunk is guaranteed to be a power of two
, m_chunk_shift(
Kokkos::Impl::integral_power_of_two(
m_pool.get_min_block_size()/sizeof(typename traits::value_type)) )
, m_chunk_mask( ( 1 << m_chunk_shift ) - 1 )
, m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift )
{
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
< Kokkos::Impl::ActiveExecutionMemorySpace
, typename traits::memory_space >::verify();
// A functor to deallocate all of the chunks upon final destruction
typedef typename traits::memory_space memory_space ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< memory_space , Destroy > record_type ;
// Allocate chunk pointers and allocation counter
record_type * const record =
record_type::allocate( memory_space()
, arg_label
, ( sizeof(pointer_type) * ( m_chunk_max + 1 ) ) );
m_chunks = reinterpret_cast<pointer_type*>( record->data() );
record->m_destroy = Destroy( m_pool , m_chunks , m_chunk_max );
// Initialize to zero
record->m_destroy.construct_shared_allocation();
m_track.assign_allocated_record_to_uninitialized( record );
}
};
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
template< class T , class ... P >
inline
typename Kokkos::Experimental::DynamicView<T,P...>::HostMirror
create_mirror_view( const Kokkos::Experimental::DynamicView<T,P...> & src )
{
return src ;
}
template< class T , class ... DP , class ... SP >
inline
void deep_copy( const View<T,DP...> & dst
, const DynamicView<T,SP...> & src
)
{
typedef View<T,DP...> dst_type ;
typedef DynamicView<T,SP...> src_type ;
typedef typename ViewTraits<T,DP...>::execution_space dst_execution_space ;
typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ;
enum { DstExecCanAccessSrc =
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
if ( DstExecCanAccessSrc ) {
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
}
else {
Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
}
}
template< class T , class ... DP , class ... SP >
inline
void deep_copy( const DynamicView<T,DP...> & dst
, const View<T,SP...> & src
)
{
typedef DynamicView<T,SP...> dst_type ;
typedef View<T,DP...> src_type ;
typedef typename ViewTraits<T,DP...>::execution_space dst_execution_space ;
typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ;
enum { DstExecCanAccessSrc =
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
if ( DstExecCanAccessSrc ) {
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
}
else {
Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
}
}
} // namespace Experimental
} // namespace Kokkos
#endif /* #ifndef KOKKOS_DYNAMIC_VIEW_HPP */

View File

@ -0,0 +1,173 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_HPP
#define KOKKOS_FUNCTIONAL_HPP
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_Functional_impl.hpp>
namespace Kokkos {
// These should work for most types
template <typename T>
struct pod_hash
{
typedef T argument_type;
typedef T first_argument_type;
typedef uint32_t second_argument_type;
typedef uint32_t result_type;
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), 0); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t operator()(T const & t, uint32_t seed) const
{ return Impl::MurmurHash3_x86_32( &t, sizeof(T), seed); }
};
template <typename T>
struct pod_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct pod_not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return !Impl::bitwise_equal(&a,&b); }
};
template <typename T>
struct equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a == b; }
};
template <typename T>
struct not_equal_to
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a != b; }
};
template <typename T>
struct greater
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a > b; }
};
template <typename T>
struct less
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a < b; }
};
template <typename T>
struct greater_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a >= b; }
};
template <typename T>
struct less_equal
{
typedef T first_argument_type;
typedef T second_argument_type;
typedef bool result_type;
KOKKOS_FORCEINLINE_FUNCTION
bool operator()(T const & a, T const & b) const
{ return a <= b; }
};
} // namespace Kokkos
#endif //KOKKOS_FUNCTIONAL_HPP

View File

@ -0,0 +1,531 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
#define KOKKOS_SEGMENTED_VIEW_HPP_
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Error.hpp>
#include <cstdio>
#if ! KOKKOS_USING_EXP_VIEW
namespace Kokkos {
namespace Experimental {
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view;
template<class MemorySpace>
inline
void DeviceSetAllocatableMemorySize(size_t) {}
#if defined( KOKKOS_HAVE_CUDA )
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
template<>
inline
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
#ifdef __CUDACC__
size_t size_limit;
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
if(size_limit<size)
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
#endif
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
}
template< class DataType ,
class Arg1Type = void ,
class Arg2Type = void ,
class Arg3Type = void>
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
{
public:
//! \name Typedefs for device types and various Kokkos::View specializations.
//@{
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
//! The type of a Kokkos::View on the device.
typedef Kokkos::View< typename traits::data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
Kokkos::MemoryUnmanaged > t_dev ;
private:
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
Kokkos::View<int,typename traits::memory_space> realloc_lock;
Kokkos::View<int,typename traits::memory_space> nsegments_;
size_t segment_length_;
size_t segment_length_m1_;
int max_segments_;
int segment_length_log2;
// Dimensions, cardinality, capacity, and offset computation for
// multidimensional array view of contiguous memory.
// Inherits from Impl::Shape
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
, typename traits::array_layout
> offset_map_type ;
offset_map_type m_offset_map ;
typedef Kokkos::View< typename traits::array_intrinsic_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > array_type ;
typedef Kokkos::View< typename traits::const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
typename traits::memory_space ,
typename traits::memory_traits > non_const_type ;
typedef Kokkos::View< typename traits::non_const_data_type ,
typename traits::array_layout ,
HostSpace ,
void > HostMirror ;
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
template< bool Accessible >
KOKKOS_INLINE_FUNCTION
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
dimension_0_intern() const
{
// In Host space
int n = 0 ;
#if ! defined( __CUDA_ARCH__ )
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
#endif
return n * segment_length_ ;
}
public:
enum { Rank = traits::rank };
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
/* \brief return (current) size of dimension 0 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n ;
}
/* \brief return size of dimension 1 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
/* \brief return size of dimension 3 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
/* \brief return size of dimension 4 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
/* \brief return size of dimension 5 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
/* \brief return size of dimension 6 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
/* \brief return size of dimension 7 */
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
/* \brief return size of dimension 2 */
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
return dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension( const iType & i ) const {
if(i==0)
return dimension_0();
else
return Kokkos::Impl::dimension( m_offset_map , i );
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type capacity() {
return segments_.dimension_0() *
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_num_segments() {
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
int n = SegmentedView::dimension_0_intern< Accessible >();
return n/segment_length_ ;
}
KOKKOS_INLINE_FUNCTION
typename traits::size_type get_max_segments() {
return max_segments_;
}
/// \brief Constructor that allocates View objects with an initial length of 0.
///
/// This constructor works mostly like the analogous constructor of View.
/// The first argument is a string label, which is entirely for your
/// benefit. (Different SegmentedView objects may have the same label if
/// you like.) The second argument 'view_length' is the size of the segments.
/// This number must be a power of two. The third argument n0 is the maximum
/// value for the first dimension of the segmented view. The maximal allocatable
/// number of Segments is thus: (n0+view_length-1)/view_length.
/// The arguments that follow are the other dimensions of the (1-7) of the
/// View objects. For example, for a View with 3 runtime dimensions,
/// the first 4 integer arguments will be nonzero:
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
/// the segmented view is (n,8,4) with n between 0 and 10000000.
/// You may omit the integer arguments that follow.
template< class LabelType >
SegmentedView(const LabelType & label ,
const size_t view_length ,
const size_t n0 ,
const size_t n1 = 0 ,
const size_t n2 = 0 ,
const size_t n3 = 0 ,
const size_t n4 = 0 ,
const size_t n5 = 0 ,
const size_t n6 = 0 ,
const size_t n7 = 0
): segment_length_(view_length),segment_length_m1_(view_length-1)
{
segment_length_log2 = -1;
size_t l = segment_length_;
while(l>0) {
l>>=1;
segment_length_log2++;
}
l = 1<<segment_length_log2;
if(l!=segment_length_)
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
max_segments_ = (n0+segment_length_m1_)/segment_length_;
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
}
KOKKOS_INLINE_FUNCTION
SegmentedView(const SegmentedView& src):
segments_(src.segments_),
realloc_lock (src.realloc_lock),
nsegments_ (src.nsegments_),
segment_length_(src.segment_length_),
segment_length_m1_(src.segment_length_m1_),
max_segments_ (src.max_segments_),
segment_length_log2(src.segment_length_log2),
m_offset_map (src.m_offset_map)
{}
KOKKOS_INLINE_FUNCTION
SegmentedView& operator= (const SegmentedView& src) {
segments_ = src.segments_;
realloc_lock = src.realloc_lock;
nsegments_ = src.nsegments_;
segment_length_= src.segment_length_;
segment_length_m1_= src.segment_length_m1_;
max_segments_ = src.max_segments_;
segment_length_log2= src.segment_length_log2;
m_offset_map = src.m_offset_map;
return *this;
}
~SegmentedView() {
if ( !segments_.tracker().ref_counting()) { return; }
size_t ref_count = segments_.tracker().ref_count();
if(ref_count == 1u) {
Kokkos::fence();
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
Kokkos::deep_copy(h_nviews,nsegments_);
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
}
}
KOKKOS_INLINE_FUNCTION
t_dev get_segment(const int& i) const {
return segments_[i];
}
template< class MemberType>
KOKKOS_INLINE_FUNCTION
void grow (MemberType& team_member, const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
if(team_member.team_rank()==0) {
bool too_small = growSize > segment_length_ * nsegments_();
if (too_small) {
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
; // get the lock
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
realloc_lock() = 0; //release the lock
}
}
team_member.team_barrier();
}
KOKKOS_INLINE_FUNCTION
void grow_non_thread_safe (const size_t& growSize) const {
if (growSize>max_segments_*segment_length_) {
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
return;
}
bool too_small = growSize > segment_length_ * nsegments_();
if(too_small) {
while(too_small) {
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
typename traits::non_const_value_type* const ptr =
new typename traits::non_const_value_type[alloc_size];
segments_(nsegments_()) =
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
m_offset_map.N6, m_offset_map.N7);
nsegments_()++;
too_small = growSize > segment_length_ * nsegments_();
}
}
}
template< typename iType0 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
}
template< typename iType0 , typename iType1 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
traits::rank == 2 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
}
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
traits::rank == 3 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
traits::rank == 4 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
traits::rank == 5 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
traits::rank == 6 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
traits::rank == 7 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<( std::is_integral<iType0>::value &&
std::is_integral<iType1>::value &&
std::is_integral<iType2>::value &&
std::is_integral<iType3>::value &&
std::is_integral<iType4>::value &&
std::is_integral<iType5>::value &&
std::is_integral<iType6>::value &&
std::is_integral<iType7>::value &&
traits::rank == 8 )
, typename traits::value_type &
>::type
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
{
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
}
};
namespace Impl {
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
struct delete_segmented_view {
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
typedef typename view_type::execution_space execution_space;
view_type view_;
delete_segmented_view(view_type view):view_(view) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
delete [] view_.get_segment(i).ptr_on_device();
}
};
}
}
}
#endif
#endif

View File

@ -0,0 +1,226 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STATICCRSGRAPH_HPP
#define KOKKOS_STATICCRSGRAPH_HPP
#include <string>
#include <vector>
#include <Kokkos_Core.hpp>
namespace Kokkos {
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
/// \tparam DataType The type of stored entries. If a StaticCrsGraph is
/// used as the graph of a sparse matrix, then this is usually an
/// integer type, the type of the column indices in the sparse
/// matrix.
///
/// \tparam Arg1Type The second template parameter, corresponding
/// either to the Device type (if there are no more template
/// parameters) or to the Layout type (if there is at least one more
/// template parameter).
///
/// \tparam Arg2Type The third template parameter, which if provided
/// corresponds to the Device type.
///
/// \tparam SizeType The type of row offsets. Usually the default
/// parameter suffices. However, setting a nondefault value is
/// necessary in some cases, for example, if you want to have a
/// sparse matrices with dimensions (and therefore column indices)
/// that fit in \c int, but want to store more than <tt>INT_MAX</tt>
/// entries in the sparse matrix.
///
/// A row has a range of entries:
/// <ul>
/// <li> <tt> row_map[i0] <= entry < row_map[i0+1] </tt> </li>
/// <li> <tt> 0 <= i1 < row_map[i0+1] - row_map[i0] </tt> </li>
/// <li> <tt> entries( entry , i2 , i3 , ... ); </tt> </li>
/// <li> <tt> entries( row_map[i0] + i1 , i2 , i3 , ... ); </tt> </li>
/// </ul>
template< class DataType,
class Arg1Type,
class Arg2Type = void,
typename SizeType = typename ViewTraits<DataType*, Arg1Type, Arg2Type, void >::size_type>
class StaticCrsGraph {
private:
typedef ViewTraits<DataType*, Arg1Type, Arg2Type, void> traits;
public:
typedef DataType data_type;
typedef typename traits::array_layout array_layout;
typedef typename traits::execution_space execution_space;
typedef typename traits::device_type device_type;
typedef SizeType size_type;
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type;
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
entries_type entries;
row_map_type row_map;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
{}
/** \brief Assign to a view of the rhs array.
* If the old view is the last view
* then allocated memory is deallocated.
*/
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
return *this;
}
/** \brief Destroy this view of the array.
* If the last view then allocated memory is deallocated.
*/
~StaticCrsGraph() {}
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
};
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input );
template< class StaticCrsGraphType , class InputSizeType >
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input );
//----------------------------------------------------------------------------
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
template< class DataType ,
class Arg1Type ,
class Arg2Type ,
typename SizeType >
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & input );
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <impl/Kokkos_StaticCrsGraph_factory.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class GraphType >
struct StaticCrsGraphMaximumEntry {
typedef typename GraphType::execution_space execution_space ;
typedef typename GraphType::data_type value_type ;
const typename GraphType::entries_type entries ;
StaticCrsGraphMaximumEntry( const GraphType & graph ) : entries( graph.entries ) {}
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned i , value_type & update ) const
{ if ( update < entries(i) ) update = entries(i); }
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{ update = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{ if ( update < input ) update = input ; }
};
}
template< class DataType, class Arg1Type, class Arg2Type, typename SizeType >
DataType maximum_entry( const StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > & graph )
{
typedef StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType> GraphType ;
typedef Impl::StaticCrsGraphMaximumEntry< GraphType > FunctorType ;
DataType result = 0 ;
Kokkos::parallel_reduce( graph.entries.dimension_0(),
FunctorType(graph), result );
return result ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_CRSARRAY_HPP */

View File

@ -0,0 +1,848 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_UnorderedMap.hpp
/// \brief Declaration and definition of Kokkos::UnorderedMap.
///
/// This header file declares and defines Kokkos::UnorderedMap and its
/// related nonmember functions.
#ifndef KOKKOS_UNORDERED_MAP_HPP
#define KOKKOS_UNORDERED_MAP_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_Functional.hpp>
#include <Kokkos_Bitset.hpp>
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_UnorderedMap_impl.hpp>
#include <iostream>
#include <stdint.h>
#include <stdexcept>
namespace Kokkos {
enum { UnorderedMapInvalidIndex = ~0u };
/// \brief First element of the return value of UnorderedMap::insert().
///
/// Inserting an element into an UnorderedMap is not guaranteed to
/// succeed. There are three possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
class UnorderedMapInsertResult
{
private:
enum Status{
SUCCESS = 1u << 31
, EXISTING = 1u << 30
, FREED_EXISTING = 1u << 29
, LIST_LENGTH_MASK = ~(SUCCESS | EXISTING | FREED_EXISTING)
};
public:
/// Did the map successful insert the key/value pair
KOKKOS_FORCEINLINE_FUNCTION
bool success() const { return (m_status & SUCCESS); }
/// Was the key already present in the map
KOKKOS_FORCEINLINE_FUNCTION
bool existing() const { return (m_status & EXISTING); }
/// Did the map fail to insert the key due to insufficent capacity
KOKKOS_FORCEINLINE_FUNCTION
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
/// Did the map lose a race condition to insert a dupulicate key/value pair
/// where an index was claimed that needed to be released
KOKKOS_FORCEINLINE_FUNCTION
bool freed_existing() const { return (m_status & FREED_EXISTING); }
/// How many iterations through the insert loop did it take before the
/// map returned
KOKKOS_FORCEINLINE_FUNCTION
uint32_t list_position() const { return (m_status & LIST_LENGTH_MASK); }
/// Index where the key can be found as long as the insert did not fail
KOKKOS_FORCEINLINE_FUNCTION
uint32_t index() const { return m_index; }
KOKKOS_FORCEINLINE_FUNCTION
UnorderedMapInsertResult()
: m_index(UnorderedMapInvalidIndex)
, m_status(0)
{}
KOKKOS_FORCEINLINE_FUNCTION
void increment_list_position()
{
m_status += (list_position() < LIST_LENGTH_MASK) ? 1u : 0u;
}
KOKKOS_FORCEINLINE_FUNCTION
void set_existing(uint32_t i, bool arg_freed_existing)
{
m_index = i;
m_status = EXISTING | (arg_freed_existing ? FREED_EXISTING : 0u) | list_position();
}
KOKKOS_FORCEINLINE_FUNCTION
void set_success(uint32_t i)
{
m_index = i;
m_status = SUCCESS | list_position();
}
private:
uint32_t m_index;
uint32_t m_status;
};
/// \class UnorderedMap
/// \brief Thread-safe, performance-portable lookup table.
///
/// This class provides a lookup table. In terms of functionality,
/// this class compares to std::unordered_map (new in C++11).
/// "Unordered" means that keys are not stored in any particular
/// order, unlike (for example) std::map. "Thread-safe" means that
/// lookups, insertion, and deletion are safe to call by multiple
/// threads in parallel. "Performance-portable" means that parallel
/// performance of these operations is reasonable, on multiple
/// hardware platforms. Platforms on which performance has been
/// tested include conventional Intel x86 multicore processors, Intel
/// Xeon Phi ("MIC"), and NVIDIA GPUs.
///
/// Parallel performance portability entails design decisions that
/// might differ from one's expectation for a sequential interface.
/// This particularly affects insertion of single elements. In an
/// interface intended for sequential use, insertion might reallocate
/// memory if the original allocation did not suffice to hold the new
/// element. In this class, insertion does <i>not</i> reallocate
/// memory. This means that it might fail. insert() returns an enum
/// which indicates whether the insert failed. There are three
/// possible conditions:
/// <ol>
/// <li> <tt>INSERT_FAILED</tt>: The insert failed. This usually
/// means that the UnorderedMap ran out of space. </li>
/// <li> <tt>INSERT_SUCCESS</tt>: The insert succeeded, and the key
/// did <i>not</i> exist in the table before. </li>
/// <li> <tt>INSERT_EXISTING</tt>: The insert succeeded, and the key
/// <i>did</i> exist in the table before. The new value was
/// ignored and the old value was left in place. </li>
/// </ol>
///
/// \tparam Key Type of keys of the lookup table. If \c const, users
/// are not allowed to add or remove keys, though they are allowed
/// to change values. In that case, the implementation may make
/// optimizations specific to the <tt>Device</tt>. For example, if
/// <tt>Device</tt> is \c Cuda, it may use texture fetches to access
/// keys.
///
/// \tparam Value Type of values stored in the lookup table. You may use
/// \c void here, in which case the table will be a set of keys. If
/// \c const, users are not allowed to change entries.
/// In that case, the implementation may make
/// optimizations specific to the \c Device, such as using texture
/// fetches to access values.
///
/// \tparam Device The Kokkos Device type.
///
/// \tparam Hasher Definition of the hash function for instances of
/// <tt>Key</tt>. The default will calculate a bitwise hash.
///
/// \tparam EqualTo Definition of the equality function for instances of
/// <tt>Key</tt>. The default will do a bitwise equality comparison.
///
template < typename Key
, typename Value
, typename Device = Kokkos::DefaultExecutionSpace
, typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>
, typename EqualTo = pod_equal_to<typename Impl::remove_const<Key>::type>
>
class UnorderedMap
{
private:
typedef typename ViewTraits<Key,Device,void,void>::host_mirror_space host_mirror_space ;
public:
//! \name Public types and constants
//@{
//key_types
typedef Key declared_key_type;
typedef typename Impl::remove_const<declared_key_type>::type key_type;
typedef typename Impl::add_const<key_type>::type const_key_type;
//value_types
typedef Value declared_value_type;
typedef typename Impl::remove_const<declared_value_type>::type value_type;
typedef typename Impl::add_const<value_type>::type const_value_type;
typedef Device execution_space;
typedef Hasher hasher_type;
typedef EqualTo equal_to_type;
typedef uint32_t size_type;
//map_types
typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type;
typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type;
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
static const bool is_set = Impl::is_same<void,value_type>::value;
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
static const bool is_modifiable_map = has_const_key && !has_const_value;
static const bool is_const_map = has_const_key && has_const_value;
typedef UnorderedMapInsertResult insert_result;
typedef UnorderedMap<Key,Value,host_mirror_space,Hasher,EqualTo> HostMirror;
typedef Impl::UnorderedMapHistogram<const_map_type> histogram_type;
//@}
private:
enum { invalid_index = ~static_cast<size_type>(0) };
typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type;
typedef typename Impl::if_c< is_insertable_map
, View< key_type *, execution_space>
, View< const key_type *, execution_space, MemoryTraits<RandomAccess> >
>::type key_type_view;
typedef typename Impl::if_c< is_insertable_map || is_modifiable_map
, View< impl_value_type *, execution_space>
, View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> >
>::type value_type_view;
typedef typename Impl::if_c< is_insertable_map
, View< size_type *, execution_space>
, View< const size_type *, execution_space, MemoryTraits<RandomAccess> >
>::type size_type_view;
typedef typename Impl::if_c< is_insertable_map
, Bitset< execution_space >
, ConstBitset< execution_space>
>::type bitset_type;
enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 };
enum { num_scalars = 3 };
typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view;
public:
//! \name Public member functions
//@{
UnorderedMap()
: m_bounded_insert()
, m_hasher()
, m_equal_to()
, m_size()
, m_available_indexes()
, m_hash_lists()
, m_next_index()
, m_keys()
, m_values()
, m_scalars()
{}
/// \brief Constructor
///
/// \param capacity_hint [in] Initial guess of how many unique keys will be inserted into the map
/// \param hash [in] Hasher function for \c Key instances. The
/// default value usually suffices.
UnorderedMap( size_type capacity_hint, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type() )
: m_bounded_insert(true)
, m_hasher(hasher)
, m_equal_to(equal_to)
, m_size()
, m_available_indexes(calculate_capacity(capacity_hint))
, m_hash_lists(ViewAllocateWithoutInitializing("UnorderedMap hash list"), Impl::find_hash_size(capacity()))
, m_next_index(ViewAllocateWithoutInitializing("UnorderedMap next index"), capacity()+1) // +1 so that the *_at functions can always return a valid reference
, m_keys("UnorderedMap keys",capacity()+1)
, m_values("UnorderedMap values",(is_set? 1 : capacity()+1))
, m_scalars("UnorderedMap scalars")
{
if (!is_insertable_map) {
throw std::runtime_error("Cannot construct a non-insertable (i.e. const key_type) unordered_map");
}
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
}
void reset_failed_insert_flag()
{
reset_flag(failed_insert_idx);
}
histogram_type get_histogram()
{
return histogram_type(*this);
}
//! Clear all entries in the table.
void clear()
{
m_bounded_insert = true;
if (capacity() == 0) return;
m_available_indexes.clear();
Kokkos::deep_copy(m_hash_lists, invalid_index);
Kokkos::deep_copy(m_next_index, invalid_index);
{
const key_type tmp = key_type();
Kokkos::deep_copy(m_keys,tmp);
}
if (is_set){
const impl_value_type tmp = impl_value_type();
Kokkos::deep_copy(m_values,tmp);
}
{
Kokkos::deep_copy(m_scalars, 0);
}
}
/// \brief Change the capacity of the the map
///
/// If there are no failed inserts the current size of the map will
/// be used as a lower bound for the input capacity.
/// If the map is not empty and does not have failed inserts
/// and the capacity changes then the current data is copied
/// into the resized / rehashed map.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel.
bool rehash(size_type requested_capacity = 0)
{
const bool bounded_insert = (capacity() == 0) || (size() == 0u);
return rehash(requested_capacity, bounded_insert );
}
bool rehash(size_type requested_capacity, bool bounded_insert)
{
if(!is_insertable_map) return false;
const size_type curr_size = size();
requested_capacity = (requested_capacity < curr_size) ? curr_size : requested_capacity;
insertable_map_type tmp(requested_capacity, m_hasher, m_equal_to);
if (curr_size) {
tmp.m_bounded_insert = false;
Impl::UnorderedMapRehash<insertable_map_type> f(tmp,*this);
f.apply();
}
tmp.m_bounded_insert = bounded_insert;
*this = tmp;
return true;
}
/// \brief The number of entries in the table.
///
/// This method has undefined behavior when erasable() is true.
///
/// Note that this is not a device function; it cannot be called in
/// a parallel kernel. The value is not stored as a variable; it
/// must be computed.
size_type size() const
{
if( capacity() == 0u ) return 0u;
if (modified()) {
m_size = m_available_indexes.count();
reset_flag(modified_idx);
}
return m_size;
}
/// \brief The current number of failed insert() calls.
///
/// This is <i>not</i> a device function; it may <i>not</i> be
/// called in a parallel kernel. The value is not stored as a
/// variable; it must be computed.
bool failed_insert() const
{
return get_flag(failed_insert_idx);
}
bool erasable() const
{
return is_insertable_map ? get_flag(erasable_idx) : false;
}
bool begin_erase()
{
bool result = !erasable();
if (is_insertable_map && result) {
execution_space::fence();
set_flag(erasable_idx);
execution_space::fence();
}
return result;
}
bool end_erase()
{
bool result = erasable();
if (is_insertable_map && result) {
execution_space::fence();
Impl::UnorderedMapErase<declared_map_type> f(*this);
f.apply();
execution_space::fence();
reset_flag(erasable_idx);
}
return result;
}
/// \brief The maximum number of entries that the table can hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
size_type capacity() const
{ return m_available_indexes.size(); }
/// \brief The number of hash table "buckets."
///
/// This is different than the number of entries that the table can
/// hold. Each key hashes to an index in [0, hash_capacity() - 1].
/// That index can hold zero or more entries. This class decides
/// what hash_capacity() should be, given the user's upper bound on
/// the number of entries the table must be able to hold.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type hash_capacity() const
{ return m_hash_lists.dimension_0(); }
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel. As discussed in the class documentation, it need not
/// succeed. The return value tells you if it did.
///
/// \param k [in] The key to attempt to insert.
/// \param v [in] The corresponding value to attempt to insert. If
/// using this class as a set (with Value = void), then you need not
/// provide this value.
KOKKOS_INLINE_FUNCTION
insert_result insert(key_type const& k, impl_value_type const&v = impl_value_type()) const
{
insert_result result;
if ( !is_insertable_map || capacity() == 0u || m_scalars((int)erasable_idx) ) {
return result;
}
if ( !m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
int volatile & failed_insert_ref = m_scalars((int)failed_insert_idx) ;
const size_type hash_value = m_hasher(k);
const size_type hash_list = hash_value % m_hash_lists.dimension_0();
size_type * curr_ptr = & m_hash_lists[ hash_list ];
size_type new_index = invalid_index ;
// Force integer multiply to long
size_type index_hint = static_cast<size_type>( (static_cast<double>(hash_list) * capacity()) / m_hash_lists.dimension_0());
size_type find_attempts = 0;
enum { bounded_find_attempts = 32u };
const size_type max_attempts = (m_bounded_insert && (bounded_find_attempts < m_available_indexes.max_hint()) ) ?
bounded_find_attempts :
m_available_indexes.max_hint();
bool not_done = true ;
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( not_done ) {
// Continue searching the unordered list for this key,
// list will only be appended during insert phase.
// Need volatile_load as other threads may be appending.
size_type curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
#if defined( __MIC__ )
#pragma noprefetch
#endif
while ( curr != invalid_index && ! m_equal_to( volatile_load(&m_keys[curr]), k) ) {
result.increment_list_position();
index_hint = curr;
curr_ptr = &m_next_index[curr];
curr = volatile_load(curr_ptr);
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
}
//------------------------------------------------------------
// If key already present then return that index.
if ( curr != invalid_index ) {
const bool free_existing = new_index != invalid_index;
if ( free_existing ) {
// Previously claimed an unused entry that was not inserted.
// Release this unused entry immediately.
if (!m_available_indexes.reset(new_index) ) {
printf("Unable to free existing\n");
}
}
result.set_existing(curr, free_existing);
not_done = false ;
}
//------------------------------------------------------------
// Key is not currently in the map.
// If the thread has claimed an entry try to insert now.
else {
//------------------------------------------------------------
// If have not already claimed an unused entry then do so now.
if (new_index == invalid_index) {
bool found = false;
// use the hash_list as the flag for the search direction
Kokkos::tie(found, index_hint) = m_available_indexes.find_any_unset_near( index_hint, hash_list );
// found and index and this thread set it
if ( !found && ++find_attempts >= max_attempts ) {
failed_insert_ref = true;
not_done = false ;
}
else if (m_available_indexes.set(index_hint) ) {
new_index = index_hint;
// Set key and value
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_keys[new_index]);
m_keys[new_index] = k ;
if (!is_set) {
KOKKOS_NONTEMPORAL_PREFETCH_STORE(&m_values[new_index]);
m_values[new_index] = v ;
}
// Do not proceed until key and value are updated in global memory
memory_fence();
}
}
else if (failed_insert_ref) {
not_done = false;
}
// Attempt to append claimed entry into the list.
// Another thread may also be trying to append the same list so protect with atomic.
if ( new_index != invalid_index &&
curr == atomic_compare_exchange(curr_ptr, static_cast<size_type>(invalid_index), new_index) ) {
// Succeeded in appending
result.set_success(new_index);
not_done = false ;
}
}
} // while ( not_done )
return result ;
}
KOKKOS_INLINE_FUNCTION
bool erase(key_type const& k) const
{
bool result = false;
if(is_insertable_map && 0u < capacity() && m_scalars((int)erasable_idx)) {
if ( ! m_scalars((int)modified_idx) ) {
m_scalars((int)modified_idx) = true;
}
size_type index = find(k);
if (valid_at(index)) {
m_available_indexes.reset(index);
result = true;
}
}
return result;
}
/// \brief Find the given key \c k, if it exists in the table.
///
/// \return If the key exists in the table, the index of the
/// value corresponding to that key; otherwise, an invalid index.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
size_type find( const key_type & k) const
{
size_type curr = 0u < capacity() ? m_hash_lists( m_hasher(k) % m_hash_lists.dimension_0() ) : invalid_index ;
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
while (curr != invalid_index && !m_equal_to( m_keys[curr], k) ) {
KOKKOS_NONTEMPORAL_PREFETCH_LOAD(&m_keys[curr != invalid_index ? curr : 0]);
curr = m_next_index[curr];
}
return curr;
}
/// \brief Does the key exist in the map
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_INLINE_FUNCTION
bool exists( const key_type & k) const
{
return valid_at(find(k));
}
/// \brief Get the value with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
///
/// 'const value_type' via Cuda texture fetch must return by value.
KOKKOS_FORCEINLINE_FUNCTION
typename Impl::if_c< (is_set || has_const_value), impl_value_type, impl_value_type &>::type
value_at(size_type i) const
{
return m_values[ is_set ? 0 : (i < capacity() ? i : capacity()) ];
}
/// \brief Get the key with \c i as its direct index.
///
/// \param i [in] Index directly into the array of entries.
///
/// This <i>is</i> a device function; it may be called in a parallel
/// kernel.
KOKKOS_FORCEINLINE_FUNCTION
key_type key_at(size_type i) const
{
return m_keys[ i < capacity() ? i : capacity() ];
}
KOKKOS_FORCEINLINE_FUNCTION
bool valid_at(size_type i) const
{
return m_available_indexes.test(i);
}
template <typename SKey, typename SValue>
UnorderedMap( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src,
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value,int>::type = 0
)
: m_bounded_insert(src.m_bounded_insert)
, m_hasher(src.m_hasher)
, m_equal_to(src.m_equal_to)
, m_size(src.m_size)
, m_available_indexes(src.m_available_indexes)
, m_hash_lists(src.m_hash_lists)
, m_next_index(src.m_next_index)
, m_keys(src.m_keys)
, m_values(src.m_values)
, m_scalars(src.m_scalars)
{}
template <typename SKey, typename SValue>
typename Impl::enable_if< Impl::UnorderedMapCanAssign<declared_key_type,declared_value_type,SKey,SValue>::value
,declared_map_type & >::type
operator=( UnorderedMap<SKey,SValue,Device,Hasher,EqualTo> const& src)
{
m_bounded_insert = src.m_bounded_insert;
m_hasher = src.m_hasher;
m_equal_to = src.m_equal_to;
m_size = src.m_size;
m_available_indexes = src.m_available_indexes;
m_hash_lists = src.m_hash_lists;
m_next_index = src.m_next_index;
m_keys = src.m_keys;
m_values = src.m_values;
m_scalars = src.m_scalars;
return *this;
}
template <typename SKey, typename SValue, typename SDevice>
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
>::type
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
{
if (m_hash_lists.ptr_on_device() != src.m_hash_lists.ptr_on_device()) {
insertable_map_type tmp;
tmp.m_bounded_insert = src.m_bounded_insert;
tmp.m_hasher = src.m_hasher;
tmp.m_equal_to = src.m_equal_to;
tmp.m_size = src.size();
tmp.m_available_indexes = bitset_type( src.capacity() );
tmp.m_hash_lists = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap hash list"), src.m_hash_lists.dimension_0() );
tmp.m_next_index = size_type_view( ViewAllocateWithoutInitializing("UnorderedMap next index"), src.m_next_index.dimension_0() );
tmp.m_keys = key_type_view( ViewAllocateWithoutInitializing("UnorderedMap keys"), src.m_keys.dimension_0() );
tmp.m_values = value_type_view( ViewAllocateWithoutInitializing("UnorderedMap values"), src.m_values.dimension_0() );
tmp.m_scalars = scalars_view("UnorderedMap scalars");
Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes);
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy;
raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0());
raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0());
raw_deep_copy(tmp.m_keys.ptr_on_device(), src.m_keys.ptr_on_device(), sizeof(key_type)*src.m_keys.dimension_0());
if (!is_set) {
raw_deep_copy(tmp.m_values.ptr_on_device(), src.m_values.ptr_on_device(), sizeof(impl_value_type)*src.m_values.dimension_0());
}
raw_deep_copy(tmp.m_scalars.ptr_on_device(), src.m_scalars.ptr_on_device(), sizeof(int)*num_scalars );
*this = tmp;
}
}
//@}
private: // private member functions
bool modified() const
{
return get_flag(modified_idx);
}
void set_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int true_ = true;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int));
}
void reset_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
const int false_ = false;
raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int));
}
bool get_flag(int flag) const
{
typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy;
int result = false;
raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int));
return result;
}
static uint32_t calculate_capacity(uint32_t capacity_hint)
{
// increase by 16% and round to nears multiple of 128
return capacity_hint ? ((static_cast<uint32_t>(7ull*capacity_hint/6u) + 127u)/128u)*128u : 128u;
}
private: // private members
bool m_bounded_insert;
hasher_type m_hasher;
equal_to_type m_equal_to;
mutable size_type m_size;
bitset_type m_available_indexes;
size_type_view m_hash_lists;
size_type_view m_next_index;
key_type_view m_keys;
value_type_view m_values;
scalars_view m_scalars;
template <typename KKey, typename VValue, typename DDevice, typename HHash, typename EEqualTo>
friend class UnorderedMap;
template <typename UMap>
friend struct Impl::UnorderedMapErase;
template <typename UMap>
friend struct Impl::UnorderedMapHistogram;
template <typename UMap>
friend struct Impl::UnorderedMapPrint;
};
// Specialization of deep_copy for two UnorderedMap objects.
template < typename DKey, typename DT, typename DDevice
, typename SKey, typename ST, typename SDevice
, typename Hasher, typename EqualTo >
inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> & dst
, const UnorderedMap<SKey, ST, SDevice, Hasher, EqualTo> & src )
{
dst.create_copy_view(src);
}
} // namespace Kokkos
#endif //KOKKOS_UNORDERED_MAP_HPP

View File

@ -0,0 +1,283 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VECTOR_HPP
#define KOKKOS_VECTOR_HPP
#include <Kokkos_Core_fwd.hpp>
#include <Kokkos_DualView.hpp>
/* Drop in replacement for std::vector based on Kokkos::DualView
* Most functions only work on the host (it will not compile if called from device kernel)
*
*/
namespace Kokkos {
template< class Scalar, class Arg1Type = void>
class vector : public DualView<Scalar*,LayoutLeft,Arg1Type> {
typedef Scalar value_type;
typedef Scalar* pointer;
typedef const Scalar* const_pointer;
typedef Scalar* reference;
typedef const Scalar* const_reference;
typedef Scalar* iterator;
typedef const Scalar* const_iterator;
private:
size_t _size;
typedef size_t size_type;
float _extra_storage;
typedef DualView<Scalar*,LayoutLeft,Arg1Type> DV;
public:
#ifdef KOKKOS_CUDA_USE_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else
inline Scalar& operator() (int i) const {return DV::h_view(i);};
inline Scalar& operator[] (int i) const {return DV::h_view(i);};
#endif
/* Member functions which behave like std::vector functions */
vector():DV() {
_size = 0;
_extra_storage = 1.1;
DV::modified_host() = 1;
};
vector(int n, Scalar val=Scalar()):DualView<Scalar*,LayoutLeft,Arg1Type>("Vector",size_t(n*(1.1))) {
_size = n;
_extra_storage = 1.1;
DV::modified_host() = 1;
assign(n,val);
}
void resize(size_t n) {
if(n>=capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
}
void resize(size_t n, const Scalar& val) {
assign(n,val);
}
void assign (size_t n, const Scalar& val) {
/* Resize if necessary (behavour of std:vector) */
if(n>capacity())
DV::resize(size_t (n*_extra_storage));
_size = n;
/* Assign value either on host or on device */
if( DV::modified_host() >= DV::modified_device() ) {
set_functor_host f(DV::h_view,val);
parallel_for(n,f);
DV::t_host::execution_space::fence();
DV::modified_host()++;
} else {
set_functor f(DV::d_view,val);
parallel_for(n,f);
DV::t_dev::execution_space::fence();
DV::modified_device()++;
}
}
void reserve(size_t n) {
DV::resize(size_t (n*_extra_storage));
}
void push_back(Scalar val) {
DV::modified_host()++;
if(_size == capacity()) {
size_t new_size = _size*_extra_storage;
if(new_size == _size) new_size++;
DV::resize(new_size);
}
DV::h_view(_size) = val;
_size++;
};
void pop_back() {
_size--;
};
void clear() {
_size = 0;
}
size_type size() const {return _size;};
size_type max_size() const {return 2000000000;}
size_type capacity() const {return DV::capacity();};
bool empty() const {return _size==0;};
iterator begin() const {return &DV::h_view(0);};
iterator end() const {return &DV::h_view(_size);};
/* std::algorithms wich work originally with iterators, here they are implemented as member functions */
size_t
lower_bound (const size_t& start,
const size_t& theEnd,
const Scalar& comp_val) const
{
int lower = start; // FIXME (mfh 24 Apr 2014) narrowing conversion
int upper = _size > theEnd? theEnd : _size-1; // FIXME (mfh 24 Apr 2014) narrowing conversion
if (upper <= lower) {
return theEnd;
}
Scalar lower_val = DV::h_view(lower);
Scalar upper_val = DV::h_view(upper);
size_t idx = (upper+lower)/2;
Scalar val = DV::h_view(idx);
if(val>upper_val) return upper;
if(val<lower_val) return start;
while(upper>lower) {
if(comp_val>val) {
lower = ++idx;
} else {
upper = idx;
}
idx = (upper+lower)/2;
val = DV::h_view(idx);
}
return idx;
}
bool is_sorted() {
for(int i=0;i<_size-1;i++) {
if(DV::h_view(i)>DV::h_view(i+1)) return false;
}
return true;
}
iterator find(Scalar val) const {
if(_size == 0) return end();
int upper,lower,current;
current = _size/2;
upper = _size-1;
lower = 0;
if((val<DV::h_view(0)) || (val>DV::h_view(_size-1)) ) return end();
while(upper>lower)
{
if(val>DV::h_view(current)) lower = current+1;
else upper = current;
current = (upper+lower)/2;
}
if(val==DV::h_view(current)) return &DV::h_view(current);
else return end();
}
/* Additional functions for data management */
void device_to_host(){
deep_copy(DV::h_view,DV::d_view);
}
void host_to_device() const {
deep_copy(DV::d_view,DV::h_view);
}
void on_host() {
DV::modified_host() = DV::modified_device() + 1;
}
void on_device() {
DV::modified_device() = DV::modified_host() + 1;
}
void set_overallocation(float extra) {
_extra_storage = 1.0 + extra;
}
public:
struct set_functor {
typedef typename DV::t_dev::execution_space execution_space;
typename DV::t_dev _data;
Scalar _val;
set_functor(typename DV::t_dev data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
struct set_functor_host {
typedef typename DV::t_host::execution_space execution_space;
typename DV::t_host _data;
Scalar _val;
set_functor_host(typename DV::t_host data, Scalar val) :
_data(data),_val(val) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
_data(i) = _val;
}
};
};
}
#endif

View File

@ -0,0 +1,109 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BITSET_IMPL_HPP
#define KOKKOS_BITSET_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <impl/Kokkos_BitOps.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos {
namespace Impl {
KOKKOS_FORCEINLINE_FUNCTION
unsigned rotate_right( unsigned i, int r )
{
enum { size = static_cast<int>( sizeof(unsigned) * CHAR_BIT ) };
return r ? ( ( i >> r ) | ( i << ( size - r ) ) ) : i ;
}
template < typename Bitset >
struct BitsetCount
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space::execution_space execution_space;
typedef typename bitset_type::size_type size_type;
typedef size_type value_type;
bitset_type m_bitset;
BitsetCount( bitset_type const& bitset )
: m_bitset(bitset)
{}
size_type apply() const
{
size_type count = 0u;
parallel_reduce( m_bitset.m_blocks.dimension_0(), *this, count );
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & count ) const
{
count = 0u;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & count, const volatile size_type & incr ) const
{
count += incr;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i, value_type & count ) const
{
count += bit_count( m_bitset.m_blocks[i] );
}
};
} // namespace Impl
} // namespace Kokkos
#endif // KOKKOS_BITSET_IMPL_HPP

View File

@ -0,0 +1,195 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_FUNCTIONAL_IMPL_HPP
#define KOKKOS_FUNCTIONAL_IMPL_HPP
#include <Kokkos_Macros.hpp>
#include <stdint.h>
namespace Kokkos { namespace Impl {
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
KOKKOS_FORCEINLINE_FUNCTION
uint32_t getblock32 ( const uint8_t * p, int i )
{
// used to avoid aliasing error which could cause errors with
// forced inlining
return ((uint32_t)p[i*4+0])
| ((uint32_t)p[i*4+1] << 8)
| ((uint32_t)p[i*4+2] << 16)
| ((uint32_t)p[i*4+3] << 24);
}
KOKKOS_FORCEINLINE_FUNCTION
uint32_t rotl32 ( uint32_t x, int8_t r )
{ return (x << r) | (x >> (32 - r)); }
KOKKOS_FORCEINLINE_FUNCTION
uint32_t fmix32 ( uint32_t h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
KOKKOS_INLINE_FUNCTION
uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
uint32_t h1 = seed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
//----------
// body
for(int i=0; i<nblocks; ++i)
{
uint32_t k1 = getblock32(data,i);
k1 *= c1;
k1 = rotl32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
uint32_t k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
return h1;
}
#if defined( __GNUC__ ) /* GNU C */ || \
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#endif
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
NUM_16 = NUM_8 / 2,
NUM_32 = NUM_8 / 4,
NUM_64 = NUM_8 / 8
};
union {
T const * const ptr;
T64 const * const ptr64;
T32 const * const ptr32;
T16 const * const ptr16;
T8 const * const ptr8;
} a = {a_ptr}, b = {b_ptr};
bool result = true;
for (int i=0; i < NUM_64; ++i) {
result = result && a.ptr64[i] == b.ptr64[i];
}
if ( NUM_64*2 < NUM_32 ) {
result = result && a.ptr32[NUM_64*2] == b.ptr32[NUM_64*2];
}
if ( NUM_32*2 < NUM_16 ) {
result = result && a.ptr16[NUM_32*2] == b.ptr16[NUM_32*2];
}
if ( NUM_16*2 < NUM_8 ) {
result = result && a.ptr8[NUM_16*2] == b.ptr8[NUM_16*2];
}
return result;
}
#undef KOKKOS_MAY_ALIAS
}} // namespace Kokkos::Impl
#endif //KOKKOS_FUNCTIONAL_IMPL_HPP

View File

@ -0,0 +1,208 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
#define KOKKOS_IMPL_STATICCRSGRAPH_FACTORY_HPP
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return view ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view )
{
// Force copy:
//typedef Impl::ViewAssignment< Impl::ViewDefault > alloc ; // unused
typedef StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type ;
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
return tmp ;
}
template< class DataType , class Arg1Type , class Arg2Type , typename SizeType >
inline
typename StaticCrsGraph< DataType , Arg1Type , Arg2Type , SizeType >::HostMirror
create_mirror_view( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view ,
typename Impl::enable_if< ! ViewTraits<DataType,Arg1Type,Arg2Type,void>::is_hostspace >::type * = 0 )
{
return create_mirror( view );
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< InputSizeType > & input )
{
typedef StaticCrsGraphType output_type ;
//typedef std::vector< InputSizeType > input_type ; // unused
typedef typename output_type::entries_type entries_type ;
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i];
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
return output ;
}
//----------------------------------------------------------------------------
template< class StaticCrsGraphType , class InputSizeType >
inline
typename StaticCrsGraphType::staticcrsgraph_type
create_staticcrsgraph( const std::string & label ,
const std::vector< std::vector< InputSizeType > > & input )
{
typedef StaticCrsGraphType output_type ;
typedef typename output_type::entries_type entries_type ;
static_assert( entries_type::rank == 1
, "Graph entries view must be rank one" );
typedef View< typename output_type::size_type [] ,
typename output_type::array_layout ,
typename output_type::execution_space > work_type ;
output_type output ;
// Create the row map:
const size_t length = input.size();
{
work_type row_work( "tmp" , length + 1 );
typename work_type::HostMirror row_work_host =
create_mirror_view( row_work );
size_t sum = 0 ;
row_work_host[0] = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
row_work_host[i+1] = sum += input[i].size();
}
deep_copy( row_work , row_work_host );
output.entries = entries_type( label , sum );
output.row_map = row_work ;
}
// Fill in the entries:
{
typename entries_type::HostMirror host_entries =
create_mirror_view( output.entries );
size_t sum = 0 ;
for ( size_t i = 0 ; i < length ; ++i ) {
for ( size_t j = 0 ; j < input[i].size() ; ++j , ++sum ) {
host_entries( sum ) = input[i][j] ;
}
}
deep_copy( output.entries , host_entries );
}
return output ;
}
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_IMPL_CRSARRAY_FACTORY_HPP */

View File

@ -0,0 +1,101 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_UnorderedMap.hpp>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size(uint32_t size)
{
if (size == 0u) return 0u;
// these primes try to preserve randomness of hash
static const uint32_t primes [] = {
3, 7, 13, 23, 53, 97, 193, 389, 769, 1543
, 2237, 2423, 2617, 2797, 2999, 3167, 3359, 3539
, 3727, 3911, 4441 , 4787 , 5119 , 5471 , 5801 , 6143 , 6521 , 6827
, 7177 , 7517 , 7853 , 8887 , 9587 , 10243 , 10937 , 11617 , 12289
, 12967 , 13649 , 14341 , 15013 , 15727
, 17749 , 19121 , 20479 , 21859 , 23209 , 24593 , 25939 , 27329
, 28669 , 30047 , 31469 , 35507 , 38231 , 40961 , 43711 , 46439
, 49157 , 51893 , 54617 , 57347 , 60077 , 62801 , 70583 , 75619
, 80669 , 85703 , 90749 , 95783 , 100823 , 105871 , 110909 , 115963
, 120997 , 126031 , 141157 , 151237 , 161323 , 171401 , 181499 , 191579
, 201653 , 211741 , 221813 , 231893 , 241979 , 252079
, 282311 , 302483 , 322649 , 342803 , 362969 , 383143 , 403301 , 423457
, 443629 , 463787 , 483953 , 504121 , 564617 , 604949 , 645313 , 685609
, 725939 , 766273 , 806609 , 846931 , 887261 , 927587 , 967919 , 1008239
, 1123477 , 1198397 , 1273289 , 1348177 , 1423067 , 1497983 , 1572869
, 1647761 , 1722667 , 1797581 , 1872461 , 1947359 , 2022253
, 2246953 , 2396759 , 2546543 , 2696363 , 2846161 , 2995973 , 3145739
, 3295541 , 3445357 , 3595117 , 3744941 , 3894707 , 4044503
, 4493921 , 4793501 , 5093089 , 5392679 , 5692279 , 5991883 , 6291469
, 6591059 , 6890641 , 7190243 , 7489829 , 7789447 , 8089033
, 8987807 , 9586981 , 10186177 , 10785371 , 11384539 , 11983729
, 12582917 , 13182109 , 13781291 , 14380469 , 14979667 , 15578861
, 16178053 , 17895707 , 19014187 , 20132683 , 21251141 , 22369661
, 23488103 , 24606583 , 25725083 , 26843549 , 27962027 , 29080529
, 30198989 , 31317469 , 32435981 , 35791397 , 38028379 , 40265327
, 42502283 , 44739259 , 46976221 , 49213237 , 51450131 , 53687099
, 55924061 , 58161041 , 60397993 , 62634959 , 64871921
, 71582857 , 76056727 , 80530643 , 85004567 , 89478503 , 93952427
, 98426347 , 102900263 , 107374217 , 111848111 , 116322053 , 120795971
, 125269877 , 129743807 , 143165587 , 152113427 , 161061283 , 170009141
, 178956983 , 187904819 , 196852693 , 205800547 , 214748383 , 223696237
, 232644089 , 241591943 , 250539763 , 259487603 , 268435399
};
const uint32_t num_primes = sizeof(primes)/sizeof(uint32_t);
uint32_t hsize = primes[num_primes-1] ;
for (uint32_t i = 0; i < num_primes; ++i) {
if (size <= primes[i]) {
hsize = primes[i];
break;
}
}
return hsize;
}
}} // namespace Kokkos::Impl

View File

@ -0,0 +1,297 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_UNORDERED_MAP_IMPL_HPP
#define KOKKOS_UNORDERED_MAP_IMPL_HPP
#include <Kokkos_Core_fwd.hpp>
#include <stdint.h>
#include <cstdio>
#include <climits>
#include <iostream>
#include <iomanip>
namespace Kokkos { namespace Impl {
uint32_t find_hash_size( uint32_t size );
template <typename Map>
struct UnorderedMapRehash
{
typedef Map map_type;
typedef typename map_type::const_map_type const_map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_dst;
const_map_type m_src;
UnorderedMapRehash( map_type const& dst, const_map_type const& src)
: m_dst(dst), m_src(src)
{}
void apply() const
{
parallel_for(m_src.capacity(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
if ( m_src.valid_at(i) )
m_dst.insert(m_src.key_at(i), m_src.value_at(i));
}
};
template <typename UMap>
struct UnorderedMapErase
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef typename map_type::key_type key_type;
typedef typename map_type::impl_value_type value_type;
map_type m_map;
UnorderedMapErase( map_type const& map)
: m_map(map)
{}
void apply() const
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
size_type curr = m_map.m_hash_lists(i);
size_type next = invalid_index;
// remove erased head of the linked-list
while (curr != invalid_index && !m_map.valid_at(curr)) {
next = m_map.m_next_index[curr];
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (m_map.is_set) m_map.m_values[curr] = value_type();
curr = next;
m_map.m_hash_lists(i) = next;
}
// if the list is non-empty and the head is valid
if (curr != invalid_index && m_map.valid_at(curr) ) {
size_type prev = curr;
curr = m_map.m_next_index[prev];
while (curr != invalid_index) {
next = m_map.m_next_index[curr];
if (m_map.valid_at(curr)) {
prev = curr;
}
else {
// remove curr from list
m_map.m_next_index[prev] = next;
m_map.m_next_index[curr] = invalid_index;
m_map.m_keys[curr] = key_type();
if (map_type::is_set) m_map.m_values[curr] = value_type();
}
curr = next;
}
}
}
};
template <typename UMap>
struct UnorderedMapHistogram
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
typedef View<int[100], execution_space> histogram_view;
typedef typename histogram_view::HostMirror host_histogram_view;
map_type m_map;
histogram_view m_length;
histogram_view m_distance;
histogram_view m_block_distance;
UnorderedMapHistogram( map_type const& map)
: m_map(map)
, m_length("UnorderedMap Histogram")
, m_distance("UnorderedMap Histogram")
, m_block_distance("UnorderedMap Histogram")
{}
void calculate()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
void clear()
{
Kokkos::deep_copy(m_length, 0);
Kokkos::deep_copy(m_distance, 0);
Kokkos::deep_copy(m_block_distance, 0);
}
void print_length(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_length);
Kokkos::deep_copy(host_copy, m_length);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_distance);
Kokkos::deep_copy(host_copy, m_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
void print_block_distance(std::ostream &out)
{
host_histogram_view host_copy = create_mirror_view(m_block_distance);
Kokkos::deep_copy(host_copy, m_block_distance);
for (int i=0, size = host_copy.dimension_0(); i<size; ++i)
{
out << host_copy[i] << " , ";
}
out << "\b\b\b " << std::endl;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t length = 0;
size_type min_index = ~0u, max_index = 0;
for (size_type curr = m_map.m_hash_lists(i); curr != invalid_index; curr = m_map.m_next_index[curr]) {
++length;
min_index = (curr < min_index) ? curr : min_index;
max_index = (max_index < curr) ? curr : max_index;
}
size_type distance = (0u < length) ? max_index - min_index : 0u;
size_type blocks = (0u < length) ? max_index/32u - min_index/32u : 0u;
// normalize data
length = length < 100u ? length : 99u;
distance = distance < 100u ? distance : 99u;
blocks = blocks < 100u ? blocks : 99u;
if (0u < length)
{
atomic_fetch_add( &m_length(length), 1);
atomic_fetch_add( &m_distance(distance), 1);
atomic_fetch_add( &m_block_distance(blocks), 1);
}
}
};
template <typename UMap>
struct UnorderedMapPrint
{
typedef UMap map_type;
typedef typename map_type::execution_space execution_space;
typedef typename map_type::size_type size_type;
map_type m_map;
UnorderedMapPrint( map_type const& map)
: m_map(map)
{}
void apply()
{
parallel_for(m_map.m_hash_lists.dimension_0(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()( size_type i ) const
{
const size_type invalid_index = map_type::invalid_index;
uint32_t list = m_map.m_hash_lists(i);
for (size_type curr = list, ii=0; curr != invalid_index; curr = m_map.m_next_index[curr], ++ii) {
printf("%d[%d]: %d->%d\n", list, ii, m_map.key_at(curr), m_map.value_at(curr));
}
}
};
template <typename DKey, typename DValue, typename SKey, typename SValue>
struct UnorderedMapCanAssign : public false_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,Key,Value> : public true_ {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public true_ {};
}} //Kokkos::Impl
#endif // KOKKOS_UNORDERED_MAP_IMPL_HPP

View File

@ -0,0 +1,40 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
SET(SOURCES
UnitTestMain.cpp
TestCuda.cpp
)
SET(LIBRARIES kokkoscore)
IF(Kokkos_ENABLE_Pthread)
LIST( APPEND SOURCES
TestThreads.cpp
)
ENDIF()
IF(Kokkos_ENABLE_Serial)
LIST( APPEND SOURCES
TestSerial.cpp
)
ENDIF()
IF(Kokkos_ENABLE_OpenMP)
LIST( APPEND SOURCES
TestOpenMP.cpp
)
ENDIF()
TRIBITS_ADD_EXECUTABLE_AND_TEST(
UnitTest
SOURCES ${SOURCES}
COMM serial mpi
NUM_MPI_PROCS 1
FAIL_REGULAR_EXPRESSION " FAILED "
TESTONLYLIBS kokkos_gtest
)

View File

@ -0,0 +1,92 @@
KOKKOS_PATH = ../..
GTEST_PATH = ../../TPL/gtest
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS =
TARGETS =
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Cuda
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Threads
TEST_TARGETS += test-threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o
TARGETS += KokkosContainers_UnitTest_Serial
TEST_TARGETS += test-serial
endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads
KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP
KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial
test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda
test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads
test-openmp: KokkosContainers_UnitTest_OpenMP
./KokkosContainers_UnitTest_OpenMP
test-serial: KokkosContainers_UnitTest_Serial
./KokkosContainers_UnitTest_Serial
build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc

View File

@ -0,0 +1,285 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_BITSET_HPP
#define KOKKOS_TEST_BITSET_HPP
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename Bitset, bool Set>
struct TestBitset
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitset( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit(unsigned collisions)
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
i = i % m_bitset.size();
if (Set) {
if (m_bitset.set(i)) {
if (m_bitset.test(i)) ++v;
}
}
else {
if (m_bitset.reset(i)) {
if (!m_bitset.test(i)) ++v;
}
}
}
};
template <typename Bitset>
struct TestBitsetTest
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetTest( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
if (m_bitset.test( i )) ++v;
}
};
template <typename Bitset, bool Set>
struct TestBitsetAny
{
typedef Bitset bitset_type;
typedef typename bitset_type::execution_space execution_space;
typedef uint32_t value_type;
bitset_type m_bitset;
TestBitsetAny( bitset_type const& bitset)
: m_bitset(bitset)
{}
unsigned testit()
{
execution_space::fence();
unsigned count = 0;
Kokkos::parallel_reduce( m_bitset.size(), *this, count);
return count;
}
KOKKOS_INLINE_FUNCTION
void init( value_type & v ) const { v = 0; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst, const volatile value_type & src ) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(uint32_t i, value_type & v) const
{
bool result = false;
unsigned attempts = 0;
uint32_t hint = (i >> 4) << 4;
while (attempts < m_bitset.max_hint()) {
if (Set) {
Kokkos::tie(result, hint) = m_bitset.find_any_unset_near(hint, i);
if (result && m_bitset.set(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
else {
Kokkos::tie(result, hint) = m_bitset.find_any_set_near(hint, i);
if (result && m_bitset.reset(hint)) {
++v;
break;
}
else if (!result) {
++attempts;
}
}
}
}
};
} // namespace Impl
template <typename Device>
void test_bitset()
{
typedef Kokkos::Bitset< Device > bitset_type;
typedef Kokkos::ConstBitset< Device > const_bitset_type;
//unsigned test_sizes[] = { 0u, 1000u, 1u<<14, 1u<<16, 10000001 };
unsigned test_sizes[] = { 1000u, 1u<<14, 1u<<16, 10000001 };
for (int i=0, end = sizeof(test_sizes)/sizeof(unsigned); i<end; ++i) {
//std::cout << "Bitset " << test_sizes[i] << std::endl;
bitset_type bitset(test_sizes[i]);
//std::cout << " Check inital count " << std::endl;
// nothing should be set
{
Impl::TestBitsetTest< bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(0u, count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check set() " << std::endl;
bitset.set();
// everything should be set
{
Impl::TestBitsetTest< const_bitset_type > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ(bitset.size(), count);
EXPECT_EQ(count, bitset.count());
}
//std::cout << " Check reset() " << std::endl;
bitset.reset();
EXPECT_EQ(0u, bitset.count());
//std::cout << " Check set(i) " << std::endl;
// test setting bits
{
Impl::TestBitset< bitset_type, true > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check reset(i) " << std::endl;
// test resetting bits
{
Impl::TestBitset< bitset_type, false > f(bitset);
uint32_t count = f.testit(10u);
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
//std::cout << " Check find_any_set(i) " << std::endl;
// test setting any bits
{
Impl::TestBitsetAny< bitset_type, true > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), bitset.count());
EXPECT_EQ( bitset.size(), count );
}
//std::cout << " Check find_any_unset(i) " << std::endl;
// test resetting any bits
{
Impl::TestBitsetAny< bitset_type, false > f(bitset);
uint32_t count = f.testit();
EXPECT_EQ( bitset.size(), count);
EXPECT_EQ( 0u, bitset.count() );
}
}
}
} // namespace Test
#endif //KOKKOS_TEST_BITSET_HPP

View File

@ -0,0 +1,263 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_TEST_COMPLEX_HPP
#define KOKKOS_TEST_COMPLEX_HPP
#include <Kokkos_Complex.hpp>
#include <gtest/gtest.h>
#include <iostream>
namespace Test {
namespace Impl {
template <typename RealType>
void testComplexConstructors () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1;
complex_type z2 (0.0, 0.0);
complex_type z3 (1.0, 0.0);
complex_type z4 (0.0, 1.0);
complex_type z5 (-1.0, -2.0);
ASSERT_TRUE( z1 == z2 );
ASSERT_TRUE( z1 != z3 );
ASSERT_TRUE( z1 != z4 );
ASSERT_TRUE( z1 != z5 );
ASSERT_TRUE( z2 != z3 );
ASSERT_TRUE( z2 != z4 );
ASSERT_TRUE( z2 != z5 );
ASSERT_TRUE( z3 != z4 );
ASSERT_TRUE( z3 != z5 );
complex_type z6 (-1.0, -2.0);
ASSERT_TRUE( z5 == z6 );
// Make sure that complex has value semantics, in particular, that
// equality tests use values and not pointers, so that
// reassignment actually changes the value.
z1 = complex_type (-3.0, -4.0);
ASSERT_TRUE( z1.real () == -3.0 );
ASSERT_TRUE( z1.imag () == -4.0 );
ASSERT_TRUE( z1 != z2 );
complex_type z7 (1.0);
ASSERT_TRUE( z3 == z7 );
ASSERT_TRUE( z7 == 1.0 );
ASSERT_TRUE( z7 != -1.0 );
z7 = complex_type (5.0);
ASSERT_TRUE( z7.real () == 5.0 );
ASSERT_TRUE( z7.imag () == 0.0 );
}
template <typename RealType>
void testPlus () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 + z2;
ASSERT_TRUE( z3 == complex_type (0.0, 0.0) );
}
template <typename RealType>
void testMinus () {
typedef Kokkos::complex<RealType> complex_type;
// Test binary minus.
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 - z2;
ASSERT_TRUE( z3 == complex_type (2.0, -2.0) );
// Test unary minus.
complex_type z4 (3.0, -4.0);
ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) );
}
template <typename RealType>
void testTimes () {
typedef Kokkos::complex<RealType> complex_type;
complex_type z1 (1.0, -1.0);
complex_type z2 (-1.0, 1.0);
complex_type z3 = z1 * z2;
ASSERT_TRUE( z3 == complex_type (0.0, 2.0) );
// Make sure that std::complex * Kokkos::complex works too.
std::complex<RealType> z4 (-1.0, 1.0);
complex_type z5 = z4 * z1;
ASSERT_TRUE( z5 == complex_type (0.0, 2.0) );
}
template <typename RealType>
void testDivide () {
typedef Kokkos::complex<RealType> complex_type;
// Test division of a complex number by a real number.
complex_type z1 (1.0, -1.0);
complex_type z2 (1.0 / 2.0, -1.0 / 2.0);
ASSERT_TRUE( z1 / 2.0 == z2 );
// (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i))
// (-1+2i)(1+i) == -3 + i
complex_type z3 (-1.0, 2.0);
complex_type z4 (1.0, -1.0);
complex_type z5 (-3.0, 1.0);
ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 );
// Test division of a complex number by a complex number.
// This assumes that RealType is a floating-point type.
complex_type z6 (Kokkos::real (z5) / 2.0,
Kokkos::imag (z5) / 2.0);
complex_type z7 = z3 / z4;
ASSERT_TRUE( z7 == z6 );
}
template <typename RealType>
void testOutsideKernel () {
testComplexConstructors<RealType> ();
testPlus<RealType> ();
testTimes<RealType> ();
testDivide<RealType> ();
}
template<typename RealType, typename Device>
void testCreateView () {
typedef Kokkos::complex<RealType> complex_type;
Kokkos::View<complex_type*, Device> x ("x", 10);
ASSERT_TRUE( x.dimension_0 () == 10 );
// Test that View assignment works.
Kokkos::View<complex_type*, Device> x_nonconst = x;
Kokkos::View<const complex_type*, Device> x_const = x;
}
template<typename RealType, typename Device>
class Fill {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i) const {
x_(i) = val_;
}
Fill (const view_type& x, const Kokkos::complex<RealType>& val) :
x_ (x), val_ (val)
{}
private:
view_type x_;
const Kokkos::complex<RealType> val_;
};
template<typename RealType, typename Device>
class Sum {
public:
typedef typename Device::execution_space execution_space;
typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type;
typedef typename view_type::size_type size_type;
typedef Kokkos::complex<RealType> value_type;
KOKKOS_INLINE_FUNCTION
void operator () (const size_type i, Kokkos::complex<RealType>& sum) const {
sum += x_(i);
}
Sum (const view_type& x) : x_ (x) {}
private:
view_type x_;
};
template<typename RealType, typename Device>
void testInsideKernel () {
typedef Kokkos::complex<RealType> complex_type;
typedef Kokkos::View<complex_type*, Device> view_type;
typedef typename view_type::size_type size_type;
const size_type N = 1000;
view_type x ("x", N);
ASSERT_TRUE( x.dimension_0 () == N );
// Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) {
// result += x[i];
// });
Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0)));
complex_type sum;
Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum);
ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 );
}
} // namespace Impl
template <typename Device>
void testComplex ()
{
Impl::testOutsideKernel<float> ();
Impl::testOutsideKernel<double> ();
Impl::testCreateView<float, Device> ();
Impl::testCreateView<double, Device> ();
Impl::testInsideKernel<float, Device> ();
Impl::testInsideKernel<double, Device> ();
}
} // namespace Test
#endif // KOKKOS_TEST_COMPLEX_HPP

View File

@ -0,0 +1,227 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <iomanip>
#include <stdint.h>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestDynamicView.hpp>
#include <TestSegmentedView.hpp>
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CUDA
namespace Test {
class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};
TEST_F( cuda , dyn_view_api) {
TestDynViewAPI< double , Kokkos::Cuda >();
}
TEST_F( cuda , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
}
void cuda_test_insert_close( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, true);
}
void cuda_test_insert_far( uint32_t num_nodes
, uint32_t num_inserts
, uint32_t num_duplicates
)
{
test_insert< Kokkos::Cuda >( num_nodes, num_inserts, num_duplicates, false);
}
void cuda_test_failed_insert( uint32_t num_nodes )
{
test_failed_insert< Kokkos::Cuda >( num_nodes );
}
void cuda_test_deep_copy( uint32_t num_nodes )
{
test_deep_copy< Kokkos::Cuda >( num_nodes );
}
void cuda_test_vector_combinations(unsigned int size)
{
test_vector_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_dualview_combinations(unsigned int size)
{
test_dualview_combinations<int,Kokkos::Cuda>(size);
}
void cuda_test_segmented_view(unsigned int size)
{
test_segmented_view<double,Kokkos::Cuda>(size);
}
void cuda_test_bitset()
{
test_bitset<Kokkos::Cuda>();
}
/*TEST_F( cuda, bitset )
{
cuda_test_bitset();
}*/
#define CUDA_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat ) \
TEST_F( cuda, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_insert_##name(num_nodes,num_inserts,num_duplicates); \
}
#define CUDA_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_failed_insert(num_nodes); \
}
#define CUDA_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_assignment_operators(num_nodes); \
}
#define CUDA_DEEP_COPY( num_nodes, repeat ) \
TEST_F( cuda, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
cuda_test_deep_copy(num_nodes); \
}
#define CUDA_VECTOR_COMBINE_TEST( size ) \
TEST_F( cuda, vector_combination##size##x) { \
cuda_test_vector_combinations(size); \
}
#define CUDA_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( cuda, dualview_combination##size##x) { \
cuda_test_dualview_combinations(size); \
}
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
TEST_F( cuda, segmentedview_##size##x) { \
cuda_test_segmented_view(size); \
}
CUDA_DUALVIEW_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 10 )
CUDA_VECTOR_COMBINE_TEST( 3057 )
CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
CUDA_DEEP_COPY( 10000, 1 )
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
CUDA_SEGMENTEDVIEW_TEST( 200 )
#undef CUDA_INSERT_TEST
#undef CUDA_FAILED_INSERT_TEST
#undef CUDA_ASSIGNEMENT_TEST
#undef CUDA_DEEP_COPY
#undef CUDA_VECTOR_COMBINE_TEST
#undef CUDA_DUALVIEW_COMBINE_TEST
#undef CUDA_SEGMENTEDVIEW_TEST
TEST_F( cuda , dynamic_view )
{
typedef TestDynamicView< double , Kokkos::CudaUVMSpace >
TestDynView ;
for ( int i = 0 ; i < 10 ; ++i ) {
TestDynView::run( 100000 + 100 * i );
}
}
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */

View File

@ -0,0 +1,121 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_DUALVIEW_HPP
#define KOKKOS_TEST_DUALVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template <typename Scalar, class Device>
struct test_dualview_combinations
{
typedef test_dualview_combinations<Scalar,Device> self_type;
typedef Scalar scalar_type;
typedef Device execution_space;
Scalar reference;
Scalar result;
template <typename ViewType>
Scalar run_me(unsigned int n,unsigned int m){
if(n<10) n = 10;
if(m<3) m = 3;
ViewType a("A",n,m);
Kokkos::deep_copy( a.d_view , 1 );
a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
a.h_view(5,1) = 3;
a.h_view(6,1) = 4;
a.h_view(7,2) = 5;
a.template modify<typename ViewType::host_mirror_space>();
ViewType b = Kokkos::subview(a,std::pair<unsigned int, unsigned int>(6,9),std::pair<unsigned int, unsigned int>(0,1));
a.template sync<typename ViewType::execution_space>();
b.template modify<typename ViewType::execution_space>();
Kokkos::deep_copy( b.d_view , 2 );
a.template sync<typename ViewType::host_mirror_space>();
Scalar count = 0;
for(unsigned int i = 0; i<a.d_view.dimension_0(); i++)
for(unsigned int j = 0; j<a.d_view.dimension_1(); j++)
count += a.h_view(i,j);
return count - a.d_view.dimension_0()*a.d_view.dimension_1()-2-4-3*2;
}
test_dualview_combinations(unsigned int size)
{
result = run_me< Kokkos::DualView<Scalar**,Kokkos::LayoutLeft,Device> >(size,3);
}
};
} // namespace Impl
template <typename Scalar, typename Device>
void test_dualview_combinations(unsigned int size)
{
Impl::test_dualview_combinations<Scalar,Device> test(size);
ASSERT_EQ( test.result,0);
}
} // namespace Test
#endif //KOKKOS_TEST_UNORDERED_MAP_HPP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,168 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_DYNAMICVIEW_HPP
#define KOKKOS_TEST_DYNAMICVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <Kokkos_Core.hpp>
#include <Kokkos_DynamicView.hpp>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
template< typename Scalar , class Space >
struct TestDynamicView
{
typedef typename Space::execution_space execution_space ;
typedef typename Space::memory_space memory_space ;
typedef Kokkos::Experimental::MemoryPool<typename Space::device_type> memory_pool_type;
typedef Kokkos::Experimental::DynamicView<Scalar*,Space> view_type;
typedef typename Kokkos::TeamPolicy<execution_space>::member_type member_type ;
typedef double value_type;
struct TEST {};
struct VERIFY {};
view_type a;
const unsigned total_size ;
TestDynamicView( const view_type & arg_a , const unsigned arg_total )
: a(arg_a), total_size( arg_total ) {}
KOKKOS_INLINE_FUNCTION
void operator() ( const TEST , member_type team_member, double& value) const
{
const unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if ( team_member.team_rank() == 0 ) {
unsigned n = team_idx + team_member.team_size();
if ( total_size < n ) n = total_size ;
a.resize_parallel( n );
if ( a.extent(0) < n ) {
Kokkos::abort("GrowTest TEST failed resize_parallel");
}
}
// Make sure resize is done for all team members:
team_member.team_barrier();
const unsigned int val = team_idx + team_member.team_rank();
if ( val < total_size ) {
value += val ;
a( val ) = val ;
}
}
KOKKOS_INLINE_FUNCTION
void operator() ( const VERIFY , member_type team_member, double& value) const
{
const unsigned int val =
team_member.team_rank() +
team_member.league_rank() * team_member.team_size();
if ( val < total_size ) {
if ( val != a(val) ) {
Kokkos::abort("GrowTest VERIFY failed resize_parallel");
}
value += a(val);
}
}
static void run( unsigned arg_total_size )
{
typedef Kokkos::TeamPolicy<execution_space,TEST> TestPolicy ;
typedef Kokkos::TeamPolicy<execution_space,VERIFY> VerifyPolicy ;
// printf("TestDynamicView::run(%d) construct memory pool\n",arg_total_size);
memory_pool_type pool( memory_space() , arg_total_size * sizeof(Scalar) * 1.2 );
// printf("TestDynamicView::run(%d) construct dynamic view\n",arg_total_size);
view_type da("A",pool,arg_total_size);
// printf("TestDynamicView::run(%d) construct test functor\n",arg_total_size);
TestDynamicView functor(da,arg_total_size);
const unsigned team_size = TestPolicy::team_size_recommended(functor);
const unsigned league_size = ( arg_total_size + team_size - 1 ) / team_size ;
double reference = 0;
double result = 0;
// printf("TestDynamicView::run(%d) run functor test\n",arg_total_size);
Kokkos::parallel_reduce( TestPolicy(league_size,team_size) , functor , reference);
execution_space::fence();
// printf("TestDynamicView::run(%d) run functor verify\n",arg_total_size);
Kokkos::parallel_reduce( VerifyPolicy(league_size,team_size) , functor , result );
execution_space::fence();
// printf("TestDynamicView::run(%d) done\n",arg_total_size);
}
};
} // namespace Test
#endif /* #ifndef KOKKOS_TEST_DYNAMICVIEW_HPP */

View File

@ -0,0 +1,182 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestDynamicView.hpp>
#include <TestSegmentedView.hpp>
#include <TestComplex.hpp>
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
#include <iomanip>
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned threads_count = 4 ;
if ( Kokkos::hwloc::available() ) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa();
}
Kokkos::OpenMP::initialize( threads_count );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};
TEST_F( openmp, complex )
{
testComplex<Kokkos::OpenMP> ();
}
TEST_F( openmp, dyn_view_api) {
TestDynViewAPI< double , Kokkos::OpenMP >();
}
TEST_F( openmp, bitset )
{
test_bitset<Kokkos::OpenMP>();
}
TEST_F( openmp , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
}
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( openmp, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::OpenMP>(num_nodes,num_inserts,num_duplicates, near); \
}
#define OPENMP_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_DEEP_COPY( num_nodes, repeat ) \
TEST_F( openmp, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::OpenMP>(num_nodes); \
}
#define OPENMP_VECTOR_COMBINE_TEST( size ) \
TEST_F( openmp, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( openmp, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
}
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
TEST_F( openmp, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::OpenMP>(size); \
}
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
OPENMP_DEEP_COPY( 10000, 1 )
OPENMP_VECTOR_COMBINE_TEST( 10 )
OPENMP_VECTOR_COMBINE_TEST( 3057 )
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
#undef OPENMP_INSERT_TEST
#undef OPENMP_FAILED_INSERT_TEST
#undef OPENMP_ASSIGNEMENT_TEST
#undef OPENMP_DEEP_COPY
#undef OPENMP_VECTOR_COMBINE_TEST
#undef OPENMP_DUALVIEW_COMBINE_TEST
#undef OPENMP_SEGMENTEDVIEW_TEST
#endif
TEST_F( openmp , dynamic_view )
{
typedef TestDynamicView< double , Kokkos::OpenMP >
TestDynView ;
for ( int i = 0 ; i < 10 ; ++i ) {
TestDynView::run( 100000 + 100 * i );
}
}
} // namespace test

View File

@ -0,0 +1,708 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
#include <gtest/gtest.h>
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <Kokkos_Core.hpp>
#if ! KOKKOS_USING_EXP_VIEW
#include <Kokkos_SegmentedView.hpp>
#include <impl/Kokkos_Timer.hpp>
namespace Test {
namespace Impl {
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct GrowTest;
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+team_member.team_size());
value += team_idx + team_member.team_rank();
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
(a.dimension(0)>team_idx+team_member.team_rank()))
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
value += team_idx + team_member.team_rank() + 13*k;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
a(team_idx+ team_member.team_rank(),k) =
team_idx+ team_member.team_rank() + 13*k;
}
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
value += team_idx + team_member.team_rank() + 13*k + 3*l;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
a(team_idx+ team_member.team_rank(),k,l) =
team_idx+ team_member.team_rank() + 13*k + 3*l;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
a(team_idx+ team_member.team_rank(),k,l,m) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
a(team_idx+ team_member.team_rank(),k,l,m,n) =
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx+ team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
}
}
};
template<class ViewType , class ExecutionSpace>
struct GrowTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
GrowTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
a.grow(team_member , team_idx + team_member.team_size());
for( typename ExecutionSpace::size_type k=0;k<7;k++)
for( typename ExecutionSpace::size_type l=0;l<3;l++)
for( typename ExecutionSpace::size_type m=0;m<2;m++)
for( typename ExecutionSpace::size_type n=0;n<3;n++)
for( typename ExecutionSpace::size_type o=0;o<2;o++)
for( typename ExecutionSpace::size_type p=0;p<4;p++)
for( typename ExecutionSpace::size_type q=0;q<3;q++)
value +=
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
}
}
};
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
struct VerifyTest;
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 1> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
value += a(team_idx+ team_member.team_rank());
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 2> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
value += a(team_idx+ team_member.team_rank(),k);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 3> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
value += a(team_idx+ team_member.team_rank(),k,l);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 4> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
value += a(team_idx+ team_member.team_rank(),k,l,m);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 5> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 6> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 7> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
}
}
};
template<class ViewType , class ExecutionSpace>
struct VerifyTest<ViewType , ExecutionSpace , 8> {
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
typedef typename Policy::member_type team_type;
typedef double value_type;
ViewType a;
VerifyTest(ViewType in):a(in) {}
KOKKOS_INLINE_FUNCTION
void operator() (team_type team_member, double& value) const {
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
(a.dimension(0)>team_idx+ team_member.team_rank())) {
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
}
}
};
template <typename Scalar, class ExecutionSpace>
struct test_segmented_view
{
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
typedef Scalar scalar_type;
typedef ExecutionSpace execution_space;
typedef Kokkos::TeamPolicy<execution_space> Policy;
double result;
double reference;
template <class ViewType>
void run_me(ViewType a, int max_length){
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
const int nteams = max_length/team_size;
reference = 0;
result = 0;
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
Kokkos::fence();
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
Kokkos::fence();
}
test_segmented_view(unsigned int size,int rank)
{
reference = 0;
result = 0;
const int dim_1 = 7;
const int dim_2 = 3;
const int dim_3 = 2;
const int dim_4 = 3;
const int dim_5 = 2;
const int dim_6 = 4;
//const int dim_7 = 3;
if(rank==1) {
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
}
if(rank==2) {
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
}
if(rank==3) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
}
if(rank==4) {
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
}
if(rank==5) {
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
}
if(rank==6) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
if(rank==7) {
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
}
if(rank==8) {
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
}
}
};
} // namespace Impl
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int size)
{
{
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
view_type a("A",128,size,7,3,2,3);
double reference;
Impl::GrowTest<view_type,ExecutionSpace> f(a);
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
const int nteams = (size+team_size-1)/team_size;
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
size_t real_size = ((size+127)/128)*128;
ASSERT_EQ(real_size,a.dimension_0());
ASSERT_EQ(7,a.dimension_1());
ASSERT_EQ(3,a.dimension_2());
ASSERT_EQ(2,a.dimension_3());
ASSERT_EQ(3,a.dimension_4());
ASSERT_EQ(2,a.dimension_5());
ASSERT_EQ(4,a.dimension_6());
ASSERT_EQ(3,a.dimension_7());
ASSERT_EQ(real_size,a.dimension(0));
ASSERT_EQ(7,a.dimension(1));
ASSERT_EQ(3,a.dimension(2));
ASSERT_EQ(2,a.dimension(3));
ASSERT_EQ(3,a.dimension(4));
ASSERT_EQ(2,a.dimension(5));
ASSERT_EQ(4,a.dimension(6));
ASSERT_EQ(3,a.dimension(7));
ASSERT_EQ(8,a.Rank);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
ASSERT_EQ(test.reference,test.result);
}
{
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
ASSERT_EQ(test.reference,test.result);
}
}
} // namespace Test
#else
template <typename Scalar, class ExecutionSpace>
void test_segmented_view(unsigned int ) {}
#endif
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */

View File

@ -0,0 +1,175 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if ! defined(KOKKOS_HAVE_SERIAL)
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
#else
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestSegmentedView.hpp>
#include <TestDynamicView.hpp>
#include <TestComplex.hpp>
#include <iomanip>
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
namespace Test {
class serial : public ::testing::Test {
protected:
static void SetUpTestCase () {
std::cout << std::setprecision(5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase () {
Kokkos::Serial::finalize ();
}
};
TEST_F( serial, dyn_view_api) {
TestDynViewAPI< double , Kokkos::Serial >();
}
TEST_F( serial , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
}
TEST_F( serial, complex )
{
testComplex<Kokkos::Serial> ();
}
TEST_F( serial, bitset )
{
test_bitset<Kokkos::Serial> ();
}
#define SERIAL_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( serial, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Serial> (num_nodes, num_inserts, num_duplicates, near); \
}
#define SERIAL_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_DEEP_COPY( num_nodes, repeat ) \
TEST_F( serial, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Serial> (num_nodes); \
}
#define SERIAL_VECTOR_COMBINE_TEST( size ) \
TEST_F( serial, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( serial, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Serial>(size); \
}
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
TEST_F( serial, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Serial>(size); \
}
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
SERIAL_DEEP_COPY( 10000, 1 )
SERIAL_VECTOR_COMBINE_TEST( 10 )
SERIAL_VECTOR_COMBINE_TEST( 3057 )
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
#undef SERIAL_INSERT_TEST
#undef SERIAL_FAILED_INSERT_TEST
#undef SERIAL_ASSIGNEMENT_TEST
#undef SERIAL_DEEP_COPY
#undef SERIAL_VECTOR_COMBINE_TEST
#undef SERIAL_DUALVIEW_COMBINE_TEST
#undef SERIAL_SEGMENTEDVIEW_TEST
TEST_F( serial , dynamic_view )
{
typedef TestDynamicView< double , Kokkos::Serial >
TestDynView ;
for ( int i = 0 ; i < 10 ; ++i ) {
TestDynView::run( 100000 + 100 * i );
}
}
} // namespace Test
#endif // KOKKOS_HAVE_SERIAL

View File

@ -0,0 +1,149 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <vector>
#include <Kokkos_StaticCrsGraph.hpp>
/*--------------------------------------------------------------------------*/
namespace TestStaticCrsGraph {
template< class Space >
void run_test_graph()
{
typedef Kokkos::StaticCrsGraph< unsigned , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 1000 ;
dView dx ;
hView hx ;
std::vector< std::vector< int > > graph( LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
graph[i].reserve(8);
for ( size_t j = 0 ; j < 8 ; ++j ) {
graph[i].push_back( i + j * 3 );
}
}
dx = Kokkos::create_staticcrsgraph<dView>( "dx" , graph );
hx = Kokkos::create_mirror( dx );
ASSERT_EQ( hx.row_map.dimension_0() - 1 , LENGTH );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t begin = hx.row_map[i];
const size_t n = hx.row_map[i+1] - begin ;
ASSERT_EQ( n , graph[i].size() );
for ( size_t j = 0 ; j < n ; ++j ) {
ASSERT_EQ( (int) hx.entries( j + begin ) , graph[i][j] );
}
}
}
template< class Space >
void run_test_graph2()
{
typedef Kokkos::StaticCrsGraph< unsigned[3] , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 10 ;
std::vector< size_t > sizes( LENGTH );
size_t total_length = 0 ;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
total_length += ( sizes[i] = 6 + i % 4 );
}
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
hView hx = Kokkos::create_mirror( dx );
hView mx = Kokkos::create_mirror( dx );
ASSERT_EQ( (size_t) dx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) hx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
ASSERT_EQ( (size_t) dx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) hx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) mx.entries.dimension_0() , (size_t) total_length );
ASSERT_EQ( (size_t) dx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) hx.entries.dimension_1() , (size_t) 3 );
ASSERT_EQ( (size_t) mx.entries.dimension_1() , (size_t) 3 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = hx.row_map[i];
const size_t entry_end = hx.row_map[i+1];
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
hx.entries(j,0) = j + 1 ;
hx.entries(j,1) = j + 2 ;
hx.entries(j,2) = j + 3 ;
}
}
Kokkos::deep_copy( dx.entries , hx.entries );
Kokkos::deep_copy( mx.entries , dx.entries );
ASSERT_EQ( mx.row_map.dimension_0() , (size_t) LENGTH + 1 );
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
const size_t entry_begin = mx.row_map[i];
const size_t entry_end = mx.row_map[i+1];
ASSERT_EQ( ( entry_end - entry_begin ) , sizes[i] );
for ( size_t j = entry_begin ; j < entry_end ; ++j ) {
ASSERT_EQ( (size_t) mx.entries( j , 0 ) , ( j + 1 ) );
ASSERT_EQ( (size_t) mx.entries( j , 1 ) , ( j + 2 ) );
ASSERT_EQ( (size_t) mx.entries( j , 2 ) , ( j + 3 ) );
}
}
}
} /* namespace TestStaticCrsGraph */

View File

@ -0,0 +1,188 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_Vector.hpp>
#include <iomanip>
//----------------------------------------------------------------------------
#include <TestBitset.hpp>
#include <TestUnorderedMap.hpp>
#include <TestStaticCrsGraph.hpp>
#include <TestVector.hpp>
#include <TestDualView.hpp>
#include <TestDynamicView.hpp>
#include <TestSegmentedView.hpp>
#include <Kokkos_DynRankView.hpp>
#include <TestDynViewAPI.hpp>
namespace Test {
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};
TEST_F( threads , dyn_view_api) {
TestDynViewAPI< double , Kokkos::Threads >();
}
TEST_F( threads , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
}
/*TEST_F( threads, bitset )
{
test_bitset<Kokkos::Threads>();
}*/
#define THREADS_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
TEST_F( threads, UnorderedMap_insert_##name##_##num_nodes##_##num_inserts##_##num_duplicates##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_insert<Kokkos::Threads>(num_nodes,num_inserts,num_duplicates, near); \
}
#define THREADS_FAILED_INSERT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_failed_insert_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_failed_insert<Kokkos::Threads>(num_nodes); \
}
#define THREADS_ASSIGNEMENT_TEST( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_assignment_operators_##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_assignement_operators<Kokkos::Threads>(num_nodes); \
}
#define THREADS_DEEP_COPY( num_nodes, repeat ) \
TEST_F( threads, UnorderedMap_deep_copy##num_nodes##_##repeat##x) { \
for (int i=0; i<repeat; ++i) \
test_deep_copy<Kokkos::Threads>(num_nodes); \
}
#define THREADS_VECTOR_COMBINE_TEST( size ) \
TEST_F( threads, vector_combination##size##x) { \
test_vector_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_DUALVIEW_COMBINE_TEST( size ) \
TEST_F( threads, dualview_combination##size##x) { \
test_dualview_combinations<int,Kokkos::Threads>(size); \
}
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
TEST_F( threads, segmentedview_##size##x) { \
test_segmented_view<double,Kokkos::Threads>(size); \
}
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
THREADS_DEEP_COPY( 10000, 1 )
THREADS_VECTOR_COMBINE_TEST( 10 )
THREADS_VECTOR_COMBINE_TEST( 3057 )
THREADS_DUALVIEW_COMBINE_TEST( 10 )
THREADS_SEGMENTEDVIEW_TEST( 10000 )
#undef THREADS_INSERT_TEST
#undef THREADS_FAILED_INSERT_TEST
#undef THREADS_ASSIGNEMENT_TEST
#undef THREADS_DEEP_COPY
#undef THREADS_VECTOR_COMBINE_TEST
#undef THREADS_DUALVIEW_COMBINE_TEST
#undef THREADS_SEGMENTEDVIEW_TEST
TEST_F( threads , dynamic_view )
{
typedef TestDynamicView< double , Kokkos::Threads >
TestDynView ;
for ( int i = 0 ; i < 10 ; ++i ) {
TestDynView::run( 100000 + 100 * i );
}
}
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */

Some files were not shown because too many files have changed in this diff Show More