Merge pull request #2004 from stanmoore1/kk_3.1

Update Kokkos library in LAMMPS to v3.1
This commit is contained in:
Axel Kohlmeyer 2020-04-24 18:35:53 -04:00 committed by GitHub
commit 72ff0dd87d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
560 changed files with 24838 additions and 15005 deletions

View File

@ -14,16 +14,30 @@ endif()
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
option(DOWNLOAD_KOKKOS "Download the KOKKOS library instead of using the bundled one" OFF)
if(DOWNLOAD_KOKKOS)
# extract Kokkos-related variables and values so we can forward them to the Kokkos library build
get_cmake_property(_VARS VARIABLES)
list(FILTER _VARS INCLUDE REGEX ^Kokkos_)
foreach(_VAR IN LISTS _VARS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-D${_VAR}=${${_VAR}}")
endforeach()
message(STATUS "KOKKOS download requested - we will build our own")
file(DOWNLOAD https://github.com/kokkos/kokkos/compare/3.0.00...stanmoore1:lammps.diff ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")
if(CMAKE_REQUEST_PIC)
list(APPEND KOKKOS_LIB_BUILD_ARGS ${CMAKE_REQUEST_PIC})
endif()
# append other CMake variables that need to be forwarded to CMAKE_ARGS
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_LIBDIR=lib")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
ExternalProject_Add(kokkos_build
URL https://github.com/kokkos/kokkos/archive/3.0.00.tar.gz
URL_MD5 281c7093aa3a603276e93abdf4be23b9
PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
URL https://github.com/kokkos/kokkos/archive/3.1.00.tar.gz
URL_MD5 f638a6c786f748a602b26faa0e96ebab
CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
)
ExternalProject_get_property(kokkos_build INSTALL_DIR)
@ -39,9 +53,9 @@ if(DOWNLOAD_KOKKOS)
install(CODE "MESSAGE(FATAL_ERROR \"Installing liblammps with downloaded libraries is currently not supported.\")")
endif()
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3)
find_package(Kokkos 3.1)
if(NOT Kokkos_FOUND)
message(FATAL_ERROR "KOKKOS library not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it")
message(FATAL_ERROR "KOKKOS library version 3.1 or later not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it")
endif()
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()

View File

@ -320,11 +320,12 @@ to have an executable that will run on this and newer architectures.
.. note::
NVIDIA GPUs with CC 5.0 (Maxwell) and newer are not compatible with
CC 3.x (Kepler). If you run Kokkos on a newer architecture than what
LAMMPS was compiled with, there will be a significant delay during
device initialization since the just-in-time compiler has to
recompile the GPU kernel code for the new hardware.
If you run Kokkos on a newer GPU architecture than what LAMMPS was
compiled with, there will be a delay during device initialization
since the just-in-time compiler has to recompile all GPU kernels
for the new hardware. This is, however, not possible when compiled
for NVIDIA GPUs with CC 3.x (Kepler) for GPUs with CC 5.0 (Maxwell)
and newer as they are not compatible.
The settings discussed below have been tested with LAMMPS and are
confirmed to work. Kokkos is an active project with ongoing improvements
@ -343,73 +344,109 @@ be specified in uppercase.
:widths: auto
* - **Arch-ID**
- **HOST or GPU**
- **Description**
* - AMDAVX
- HOST
- AMD 64-bit x86 CPU (AVX 1)
* - EPYC
- HOST
- AMD EPYC Zen class CPU (AVX 2)
* - ARMV80
- HOST
- ARMv8.0 Compatible CPU
* - ARMV81
- HOST
- ARMv8.1 Compatible CPU
* - ARMV8_THUNDERX
- HOST
- ARMv8 Cavium ThunderX CPU
* - ARMV8_THUNDERX2
- HOST
- ARMv8 Cavium ThunderX2 CPU
* - WSM
- HOST
- Intel Westmere CPU (SSE 4.2)
* - SNB
- HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1)
* - HSW
- HOST
- Intel Haswell CPU (AVX 2)
* - BDW
- HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem)
* - SKX
- HOST
- Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem)
* - KNC
- HOST
- Intel Knights Corner Xeon Phi
* - KNL
- HOST
- Intel Knights Landing Xeon Phi
* - BGQ
- HOST
- IBM Blue Gene/Q CPU
* - POWER7
- IBM POWER8 CPU
- HOST
- IBM POWER7 CPU
* - POWER8
- HOST
- IBM POWER8 CPU
* - POWER9
- HOST
- IBM POWER9 CPU
* - KEPLER30
- GPU
- NVIDIA Kepler generation CC 3.0 GPU
* - KEPLER32
- GPU
- NVIDIA Kepler generation CC 3.2 GPU
* - KEPLER35
- GPU
- NVIDIA Kepler generation CC 3.5 GPU
* - KEPLER37
- GPU
- NVIDIA Kepler generation CC 3.7 GPU
* - MAXWELL50
- GPU
- NVIDIA Maxwell generation CC 5.0 GPU
* - MAXWELL52
- GPU
- NVIDIA Maxwell generation CC 5.2 GPU
* - MAXWELL53
- GPU
- NVIDIA Maxwell generation CC 5.3 GPU
* - PASCAL60
- GPU
- NVIDIA Pascal generation CC 6.0 GPU
* - PASCAL61
- GPU
- NVIDIA Pascal generation CC 6.1 GPU
* - VOLTA70
- GPU
- NVIDIA Volta generation CC 7.0 GPU
* - VOLTA72
- GPU
- NVIDIA Volta generation CC 7.2 GPU
* - TURING75
- GPU
- NVIDIA Turing generation CC 7.5 GPU
* - VEGA900
- GPU
- AMD GPU MI25 GFX900
* - VEGA906
- GPU
- AMD GPU MI50/MI60 GFX906
CMake build settings:
^^^^^^^^^^^^^^^^^^^^^
Basic CMake build settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^
For multicore CPUs using OpenMP, set these 2 variables.
.. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ENABLE_OPENMP=yes
-D BUILD_OMP=yes
@ -427,15 +464,19 @@ For NVIDIA GPUs using CUDA, set these variables:
.. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_CUDA=yes
-D Kokkos_ENABLE_OPENMP=yes
-D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper
The wrapper value is the Cuda nvcc compiler wrapper provided in the
Kokkos library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should
include the full path name to the wrapper, e.g.
This will also enable executing FFTs on the GPU, either via the internal
KISSFFT library, or - by preference - with the cuFFT library bundled
with the CUDA toolkit, depending on whether CMake can identify its
location. The *wrapper* value for ``CMAKE_CXX_COMPILER`` variable is
the path to the CUDA nvcc compiler wrapper provided in the Kokkos
library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should include
the full path name to the wrapper, e.g.
.. code-block:: bash
@ -455,8 +496,8 @@ common packages enabled, you can do the following:
cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake
cmake --build .
Traditional make settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^
Basic traditional make settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Choose which hardware to support in ``Makefile.machine`` via
``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings. See the
@ -467,7 +508,7 @@ For multicore CPUs using OpenMP:
.. code-block:: make
KOKKOS_DEVICES = OpenMP
KOKKOS_ARCH = CPUARCH # CPUARCH = CPU from list above
KOKKOS_ARCH = HOSTARCH # HOSTARCH = HOST from list above
For Intel KNLs using OpenMP:
@ -481,7 +522,8 @@ For NVIDIA GPUs using CUDA:
.. code-block:: make
KOKKOS_DEVICES = Cuda
KOKKOS_ARCH = CPUARCH,GPUARCH # CPUARCH = CPU from list above that is hosting the GPU
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
KOKKOS_CUDA_OPTIONS = "enable_lambda"
# GPUARCH = GPU from list above
FFT_INC = -DFFT_CUFFT # enable use of cuFFT (optional)
FFT_LIB = -lcufft # link to cuFFT library
@ -504,6 +546,44 @@ C++ compiler for non-Kokkos, non-CUDA files.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are other allowed options when building with the KOKKOS package
that can improve performance or assist in debugging or profiling. Below
are some examples that may be useful in combination with LAMMPS. For
the full list (which keeps changing as the Kokkos package itself evolves),
please consult the Kokkos library documentation.
As alternative to using multi-threading via OpenMP
(``-DKokkos_ENABLE_OPENMP=on`` or ``KOKKOS_DEVICES=OpenMP``) it is also
possible to use Posix threads directly (``-DKokkos_ENABLE_PTHREAD=on``
or ``KOKKOS_DEVICES=Pthread``). While binding of threads to individual
or groups of CPU cores is managed in OpenMP with environment variables,
you need assistance from either the "hwloc" or "libnuma" library for the
Pthread thread parallelization option. To enable use with CMake:
``-DKokkos_ENABLE_HWLOC=on`` or ``-DKokkos_ENABLE_LIBNUMA=on``; and with
conventional make: ``KOKKOS_USE_TPLS=hwloc`` or
``KOKKOS_USE_TPLS=libnuma``.
The CMake option ``-DKokkos_ENABLE_LIBRT=on`` or the makefile setting
``KOKKOS_USE_TPLS=librt`` enables the use of a more accurate timer
mechanism on many Unix-like platforms for internal profiling.
The CMake option ``-DKokkos_ENABLE_DEBUG=on`` or the makefile setting
``KOKKOS_DEBUG=yes`` enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures. As to be expected, enabling
this option will negatively impact the performance and thus is only
recommended when developing a Kokkos-enabled style in LAMMPS.
The CMake option ``-DKokkos_ENABLE_CUDA_UVM=on`` or the makefile
setting ``KOKKOS_CUDA_OPTIONS=enable_lambda,force_uvm`` enables the
use of CUDA "Unified Virtual Memory" in Kokkos. Please note, that
the LAMMPS KOKKOS package must **always** be compiled with the
*enable_lambda* option when using GPUs.
----------
.. _latte:

View File

@ -9,10 +9,7 @@ different back end languages such as CUDA, OpenMP, or Pthreads. The
Kokkos library also provides data abstractions to adjust (at compile
time) the memory layout of data structures like 2d and 3d arrays to
optimize performance on different hardware. For more information on
Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_. Kokkos is
part of `Trilinos <https://www.trilinos.org/>`_. The Kokkos
library was written primarily by Carter Edwards, Christian Trott, and
Dan Sunderland (all Sandia).
Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_.
The LAMMPS KOKKOS package contains versions of pair, fix, and atom
styles that use data structures and macros provided by the Kokkos
@ -21,7 +18,7 @@ package was developed primarily by Christian Trott (Sandia) and Stan
Moore (Sandia) with contributions of various styles by others,
including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez
(Sandia). For more information on developing using Kokkos abstractions
see the Kokkos programmers' guide at /lib/kokkos/doc/Kokkos_PG.pdf.
see the Kokkos `Wiki <https://github.com/kokkos/kokkos/wiki>`_.
Kokkos currently provides support for 3 modes of execution (per MPI
task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
@ -31,33 +28,30 @@ compatible with specific hardware.
.. note::
Kokkos support within LAMMPS must be built with a C++11 compatible
compiler. This means GCC version 4.7.2 or later, Intel 14.0.4 or later, or
Clang 3.5.2 or later is required.
.. note::
To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA
To build with Kokkos support for NVIDIA GPUs, the NVIDIA CUDA toolkit
software version 9.0 or later must be installed on your system. See
the discussion for the :doc:`GPU package <Speed_gpu>` for details of how
to check and do this.
the discussion for the :doc:`GPU package <Speed_gpu>` for details of
how to check and do this.
.. note::
Kokkos with CUDA currently implicitly assumes that the MPI library
is CUDA-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is not
a problem when using only a single GPU with a single MPI rank. When
running with multiple MPI ranks, you may see segmentation faults without
CUDA-aware MPI support. These can be avoided by adding the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the LAMMPS command line or by
using the command :doc:`package kokkos cuda/aware off <package>` in the
input file.
Kokkos with CUDA currently implicitly assumes that the MPI library is
CUDA-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is
not a problem when using only a single GPU with a single MPI
rank. When running with multiple MPI ranks, you may see segmentation
faults without CUDA-aware MPI support. These can be avoided by adding
the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
LAMMPS command line or by using the command :doc:`package kokkos
cuda/aware off <package>` in the input file.
**Building LAMMPS with the KOKKOS package:**
Building LAMMPS with the KOKKOS package
"""""""""""""""""""""""""""""""""""""""
See the :ref:`Build extras <kokkos>` doc page for instructions.
**Running LAMMPS with the KOKKOS package:**
Running LAMMPS with the KOKKOS package
""""""""""""""""""""""""""""""""""""""
All Kokkos operations occur within the context of an individual MPI
task running on a single node of the machine. The total number of MPI
@ -66,7 +60,8 @@ usual manner via the mpirun or mpiexec commands, and is independent of
Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and
-npernode switches. Ditto for MPICH via -np and -ppn.
**Running on a multi-core CPU:**
Running on a multi-core CPU
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package
for CPU acceleration, assuming one or more 16-core nodes.
@ -142,7 +137,8 @@ atom. When using the Kokkos Serial back end or the OpenMP back end with
a single thread, no duplication or atomic operations are used. For CUDA
and half neighbor lists, the KOKKOS package always uses atomic operations.
**Core and Thread Affinity:**
CPU Cores, Sockets and Thread Affinity
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When using multi-threading, it is important for performance to bind
both MPI tasks to physical cores, and threads to physical cores, so
@ -156,15 +152,16 @@ for your MPI installation), binding can be forced with these flags:
OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ...
Mvapich2 2.0: mpiexec -np 2 --bind-to socket --map-by socket ./lmp_mvapich ...
For binding threads with KOKKOS OpenMP, use thread affinity
environment variables to force binding. With OpenMP 3.1 (gcc 4.7 or
later, intel 12 or later) setting the environment variable
OMP_PROC_BIND=true should be sufficient. In general, for best
performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and
OMP_PLACES=threads. For binding threads with the KOKKOS pthreads
option, compile LAMMPS the KOKKOS HWLOC=yes option as described below.
For binding threads with KOKKOS OpenMP, use thread affinity environment
variables to force binding. With OpenMP 3.1 (gcc 4.7 or later, intel 12
or later) setting the environment variable ``OMP_PROC_BIND=true`` should
be sufficient. In general, for best performance with OpenMP 4.0 or later
set ``OMP_PROC_BIND=spread`` and ``OMP_PLACES=threads``. For binding
threads with the KOKKOS pthreads option, compile LAMMPS with the hwloc
or libnuma support enabled as described in the :ref:`extra build options page <kokkos>`.
**Running on Knight's Landing (KNL) Intel Xeon Phi:**
Running on Knight's Landing (KNL) Intel Xeon Phi
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package for the
Intel Knight's Landing (KNL) Xeon Phi:
@ -222,7 +219,8 @@ threads/task as Nt. The product of these two values should be N, i.e.
them in "native" mode, not "offload" mode like the USER-INTEL package
supports.
**Running on GPUs:**
Running on GPUs
^^^^^^^^^^^^^^^
Use the "-k" :doc:`command-line switch <Run_options>` to specify the
number of GPUs per node. Typically the -np setting of the mpirun command
@ -257,7 +255,7 @@ one or more nodes, each with two GPUs:
running on GPUs is to use "full" neighbor lists and set the Newton flag
to "off" for both pairwise and bonded interactions, along with threaded
communication. When running on Maxwell or Kepler GPUs, this will
typically be best. For Pascal GPUs, using "half" neighbor lists and
typically be best. For Pascal GPUs and beyond, using "half" neighbor lists and
setting the Newton flag to "on" may be faster. For many pair styles,
setting the neighbor binsize equal to twice the CPU default value will
give speedup, which is the default when running on GPUs. Use the "-pk
@ -270,13 +268,6 @@ one or more nodes, each with two GPUs:
mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
.. note::
For good performance of the KOKKOS package on GPUs, you must
have Kepler generation GPUs (or later). The Kokkos library exploits
texture cache options not supported by Telsa generation GPUs (or
older).
.. note::
When using a GPU, you will achieve the best performance if your
@ -293,7 +284,8 @@ one or more nodes, each with two GPUs:
kspace, etc., you must set the environment variable CUDA_LAUNCH_BLOCKING=1.
However, this will reduce performance and is not recommended for production runs.
**Run with the KOKKOS package by editing an input script:**
Run with the KOKKOS package by editing an input script
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Alternatively the effect of the "-sf" or "-pk" switches can be
duplicated by adding the :doc:`package kokkos <package>` or :doc:`suffix kk <suffix>` commands to your input script.
@ -316,17 +308,24 @@ You only need to use the :doc:`package kokkos <package>` command if you
wish to change any of its option defaults, as set by the "-k on"
:doc:`command-line switch <Run_options>`.
**Using OpenMP threading and CUDA together (experimental):**
**Using OpenMP threading and CUDA together:**
With the KOKKOS package, both OpenMP multi-threading and GPUs can be
used together in a few special cases. In the Makefile, the
KOKKOS_DEVICES variable must include both "Cuda" and "OpenMP", as is
the case for /src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi
compiled and used together in a few special cases. In the makefile for
the conventional build, the KOKKOS_DEVICES variable must include both,
"Cuda" and "OpenMP", as is the case for ``/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi``.
.. code-block:: bash
KOKKOS_DEVICES=Cuda,OpenMP
When building with CMake you need to enable both features as it is done
in the ``kokkos-cuda.cmake`` CMake preset file.
.. code-block:: bash
cmake ../cmake -DKokkos_ENABLE_CUDA=yes -DKokkos_ENABLE_OPENMP=yes
The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA,
using the "-sf kk" in the command line gives the default CUDA version
everywhere. However, if the "/kk/host" suffix is added to a specific
@ -360,7 +359,8 @@ suffix for kspace and bonds, angles, etc. in the input file and the
sure the environment variable CUDA_LAUNCH_BLOCKING is not set to "1"
so CPU/GPU overlap can occur.
**Speed-ups to expect:**
Performance to expect
"""""""""""""""""""""
The performance of KOKKOS running in different modes is a function of
your hardware, which KOKKOS-enable styles are used, and the problem
@ -377,52 +377,26 @@ Generally speaking, the following rules of thumb apply:
performance of a KOKKOS style is a bit slower than the USER-OMP
package.
* When running large number of atoms per GPU, KOKKOS is typically faster
than the GPU package.
than the GPU package when compiled for double precision. The benefit
of using single or mixed precision with the GPU package depends
significantly on the hardware in use and the simulated system and pair
style.
* When running on Intel hardware, KOKKOS is not as fast as
the USER-INTEL package, which is optimized for that hardware.
the USER-INTEL package, which is optimized for x86 hardware (not just
from Intel) and compilation with the Intel compilers. The USER-INTEL
package also can increase the vector length of vector instructions
by switching to single or mixed precision mode.
See the `Benchmark page <https://lammps.sandia.gov/bench.html>`_ of the
LAMMPS web site for performance of the KOKKOS package on different
hardware.
**Advanced Kokkos options:**
Advanced Kokkos options
"""""""""""""""""""""""
There are other allowed options when building with the KOKKOS package.
As explained on the :ref:`Build extras <kokkos>` doc page,
they can be set either as variables on the make command line or in
Makefile.machine, or they can be specified as CMake variables. Each
takes a value shown below. The default value is listed, which is set
in the lib/kokkos/Makefile.kokkos file.
* KOKKOS_DEBUG, values = *yes*\ , *no*\ , default = *no*
* KOKKOS_USE_TPLS, values = *hwloc*\ , *librt*\ , *experimental_memkind*, default = *none*
* KOKKOS_CXX_STANDARD, values = *c++11*\ , *c++1z*\ , default = *c++11*
* KOKKOS_OPTIONS, values = *aggressive_vectorization*, *disable_profiling*, default = *none*
* KOKKOS_CUDA_OPTIONS, values = *force_uvm*, *use_ldg*, *rdc*\ , *enable_lambda*, default = *enable_lambda*
KOKKOS_USE_TPLS=hwloc binds threads to hardware cores, so they do not
migrate during a simulation. KOKKOS_USE_TPLS=hwloc should always be
used if running with KOKKOS_DEVICES=Pthreads for pthreads. It is not
necessary for KOKKOS_DEVICES=OpenMP for OpenMP, because OpenMP
provides alternative methods via environment variables for binding
threads to hardware cores. More info on binding threads to cores is
given on the :doc:`Speed omp <Speed_omp>` doc page.
KOKKOS_USE_TPLS=librt enables use of a more accurate timer mechanism
on most Unix platforms. This library is not available on all
platforms.
KOKKOS_DEBUG is only useful when developing a Kokkos-enabled style
within LAMMPS. KOKKOS_DEBUG=yes enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures.
KOKKOS_CXX_STANDARD and KOKKOS_OPTIONS are typically not changed when
building LAMMPS.
KOKKOS_CUDA_OPTIONS are additional options for CUDA. The LAMMPS KOKKOS
package must be compiled with the *enable_lambda* option when using
GPUs.
There are other allowed options when building with the KOKKOS package
that can improve performance or assist in debugging or profiling.
They are explained on the :ref:`KOKKOS section of the build extras <kokkos>` doc page,
Restrictions
""""""""""""

View File

@ -499,6 +499,7 @@ cuda
Cuda
CUDA
CuH
cuFFT
Cummins
Curk
customIDs
@ -1544,6 +1545,7 @@ libmeam
libmessage
libmpi
libmpich
libnuma
libplumed
libplumedKernel
libpng

View File

@ -40,6 +40,13 @@ cmake ${srcdir} \
````
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
## Platform-specific Problems
### Cray
* The Cray compiler wrappers do static linking by default. This seems to break the Kokkos build. You will likely need to set the environment variable `CRAYPE_LINK_TYPE=dynamic` in order to link correctly. Kokkos warns during configure if this is missing.
* The Cray compiler identifies to CMake as Clang, but it sometimes has its own flags that differ from Clang. We try to include all exceptions, but flag errors may occur in which a Clang-specific flag is passed that the Cray compiler does not recognize.
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list:
@ -63,6 +70,7 @@ For a complete list of Kokkos options, run:
````
spack info kokkos
````
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
#### Spack Development
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
@ -73,32 +81,8 @@ spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
A better way to use Spack for doing Kokkos development is the DIY feature of Spack.
If you wish to develop Kokkos itself, go to the Kokkos source folder:
````
spack diy -u cmake kokkos@diy ...
````
where `...` is a Spack spec identifying the exact Kokkos configuration.
This then creates a `spack-build` directory where you can run `make`.
If doing development on a downstream project, you can do almost exactly the same thing.
````
spack diy -u cmake ${myproject}@${myversion} ... ^kokkos...
````
where the `...` are the specs for your project and the desired Kokkos configuration.
Again, a `spack-build` directory will be created where you can run `make`.
Spack has a few idiosyncracies that make building outside of Spack annoying related to Spack forcing use of a compiler wrapper. This can be worked around by having a `-DSpack_WORKAROUND=On` given your CMake. Then add the block of code to your CMakeLists.txt:
````
if (Spack_WORKAROUND)
set(SPACK_CXX $ENV{SPACK_CXX})
if(SPACK_CXX)
set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
set(ENV{CXX} ${SPACK_CXX})
endif()
endif()
````
A better way to use Spack for doing Kokkos development is the dev-build feature of Spack.
For dev-build details, consult the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
# Kokkos Keyword Listing
@ -157,6 +141,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_DEPRECATED_CODE
* Whether to enable deprecated code
* BOOL Default: OFF
* Kokkos_ENABLE_EXAMPLES
* Whether to enable building examples
* BOOL Default: OFF
* Kokkos_ENABLE_HPX_ASYNC_DISPATCH
* Whether HPX supports asynchronous dispatch
* BOOL Default: OFF

View File

@ -1,5 +1,59 @@
# Change Log
## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00)
**Features:**
- HIP Support for AMD
- OpenMPTarget Support with clang
- Windows VS19 (Serial) Support [\#1533](https://github.com/kokkos/kokkos/issues/1533)
**Implemented enhancements:**
- generate\_makefile.bash should allow tests to be disabled [\#2886](https://github.com/kokkos/kokkos/issues/2886)
- clang/7+cuda/9 build -Werror-unused parameter error in nightly test [\#2884](https://github.com/kokkos/kokkos/issues/2884)
- ScatterView memory space is not user settable [\#2826](https://github.com/kokkos/kokkos/issues/2826)
- clang/8+cuda/10.0 build error with c++17 [\#2809](https://github.com/kokkos/kokkos/issues/2809)
- warnings.... [\#2805](https://github.com/kokkos/kokkos/issues/2805)
- Kokkos version in cpp define [\#2787](https://github.com/kokkos/kokkos/issues/2787)
- Remove Defunct QThreads Backend [\#2751](https://github.com/kokkos/kokkos/issues/2751)
- Improve Kokkos::fence behavior with multiple execution spaces [\#2659](https://github.com/kokkos/kokkos/issues/2659)
- polylithic\(?\) initialization of Kokkos [\#2658](https://github.com/kokkos/kokkos/issues/2658)
- Unnecessary\(?\) check for host execution space initialization from Cuda initialization [\#2652](https://github.com/kokkos/kokkos/issues/2652)
- Kokkos error reporting failures with CUDA GPUs in exclusive mode [\#2471](https://github.com/kokkos/kokkos/issues/2471)
- atomicMax equivalent \(and other atomics\) [\#2401](https://github.com/kokkos/kokkos/issues/2401)
- Fix alignment for Kokkos::complex [\#2255](https://github.com/kokkos/kokkos/issues/2255)
- Warnings with Cuda 10.1 [\#2206](https://github.com/kokkos/kokkos/issues/2206)
- dual view with Kokkos::ViewAllocateWithoutInitializing [\#2188](https://github.com/kokkos/kokkos/issues/2188)
- Check error code from cudaOccupancyMaxActiveBlocksPerMultiprocessor [\#2172](https://github.com/kokkos/kokkos/issues/2172)
- Add non-member Kokkos::resize/realloc for DualView [\#2170](https://github.com/kokkos/kokkos/issues/2170)
- Construct DualView without initialization [\#2046](https://github.com/kokkos/kokkos/issues/2046)
- Expose is\_assignable to determine if one view can be assigned to another [\#1936](https://github.com/kokkos/kokkos/issues/1936)
- profiling label [\#1935](https://github.com/kokkos/kokkos/issues/1935)
- team\_broadcast of bool failed on CUDA backend [\#1908](https://github.com/kokkos/kokkos/issues/1908)
- View static\_extent [\#660](https://github.com/kokkos/kokkos/issues/660)
- Misleading Kokkos::Cuda::initialize ERROR message when compiled for wrong GPU architecture [\#1944](https://github.com/kokkos/kokkos/issues/1944)
- Cryptic Error When Malloc Fails [\#2164](https://github.com/kokkos/kokkos/issues/2164)
- Drop support for intermediate standards in CMake [\#2336](https://github.com/kokkos/kokkos/issues/2336)
**Fixed bugs:**
- DualView sync\_device with length zero creates cuda errors [\#2946](https://github.com/kokkos/kokkos/issues/2946)
- building with nvcc and clang \(or clang based XL\) as host compiler: "Kokkos::atomic\_fetch\_min\(volatile int \*, int\)" has already been defined [\#2903](https://github.com/kokkos/kokkos/issues/2903)
- Cuda 9.1,10.1 debug builds failing due to -Werror=unused-parameter [\#2880](https://github.com/kokkos/kokkos/issues/2880)
- clang -Werror: Kokkos\_FixedBufferMemoryPool.hpp:140:28: error: unused parameter 'alloc\_size' [\#2869](https://github.com/kokkos/kokkos/issues/2869)
- intel/16.0.1, intel/17.0.1 nightly build failures with debugging enabled [\#2867](https://github.com/kokkos/kokkos/issues/2867)
- intel/16.0.1 debug build errors [\#2863](https://github.com/kokkos/kokkos/issues/2863)
- xl/16.1.1 with cpp14, openmp build, nightly test failures [\#2856](https://github.com/kokkos/kokkos/issues/2856)
- Intel nightly test failures: team\_vector [\#2852](https://github.com/kokkos/kokkos/issues/2852)
- Kokkos Views with intmax/2\<N\<intmax can hang during construction [\#2850](https://github.com/kokkos/kokkos/issues/2850)
- workgraph\_fib test seg-faults with threads backend and hwloc [\#2797](https://github.com/kokkos/kokkos/issues/2797)
- cuda.view\_64bit test hangs on Power8+Kepler37 system - develop and 2.9.00 branches [\#2771](https://github.com/kokkos/kokkos/issues/2771)
- device\_type for Kokkos\_Random ? [\#2693](https://github.com/kokkos/kokkos/issues/2693)
- "More than one tag given" error in Experimental::require\(\) [\#2608](https://github.com/kokkos/kokkos/issues/2608)
- Segfault on Marvell from our finalization stack [\#2542](https://github.com/kokkos/kokkos/issues/2542)
## [3.0.00](https://github.com/kokkos/kokkos/tree/3.0.00) (2020-01-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.9.00...3.0.00)
@ -214,7 +268,7 @@
## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
**Implemented enhancements:**
@ -258,7 +312,7 @@
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
**Implemented enhancements:**

View File

@ -2,7 +2,7 @@
# We want to determine if options are given with the wrong case
# In order to detect which arguments are given to compare against
# the list of valid arguments, at the beginning here we need to
# form a list of all the given variables. If it begins with any
# form a list of all the given variables. If it begins with any
# case of KoKkOS, we add it to the list.
@ -25,6 +25,8 @@ SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Needed to simplify syntax of if statements
CMAKE_POLICY(SET CMP0054 NEW)
# Needed to make IN_LIST a valid operator
CMAKE_POLICY(SET CMP0057 NEW)
# Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL)
@ -65,7 +67,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
IF (Spack_WORKAROUND)
IF (Spack_WORKAROUND)
#if we are explicitly using Spack for development,
#nuke the Spack compiler
SET(SPACK_CXX $ENV{SPACK_CXX})
@ -75,7 +77,15 @@ IF(NOT KOKKOS_HAS_TRILINOS)
ENDIF()
ENDif()
IF(NOT DEFINED ${PROJECT_NAME})
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
ENDIF()
PROJECT(Kokkos CXX)
IF(Kokkos_ENABLE_HIP)
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
ENDIF()
ENDIF()
ENDIF()
@ -92,16 +102,17 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 0)
set(Kokkos_VERSION_MINOR 1)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables")
CMAKE_POLICY(SET CMP0074 NEW)
ENDIF()
# Load either the real TriBITS or a TriBITS wrapper
# Load either the real TriBITS or a TriBITS wrapper
# for certain utility functions that are universal (like GLOBAL_SET)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake)
@ -118,18 +129,14 @@ ENDIF()
# These are the variables we will append to as we go
# I really wish these were regular variables
# but scoping issues can make it difficult
GLOBAL_RESET(KOKKOS_COMPILE_OPTIONS)
GLOBAL_RESET(KOKKOS_LINK_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDA_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_RESET(KOKKOS_XCOMPILER_OPTIONS)
GLOBAL_SET(KOKKOS_COMPILE_OPTIONS)
GLOBAL_SET(KOKKOS_LINK_OPTIONS)
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS)
# We need to append text here for making sure TPLs
# we import are available for an installed Kokkos
GLOBAL_RESET(KOKKOS_TPL_EXPORTS)
# We need these for controlling the exact -std flag
GLOBAL_RESET(KOKKOS_DONT_ALLOW_EXTENSIONS)
GLOBAL_RESET(KOKKOS_USE_CXX_EXTENSIONS)
GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
GLOBAL_SET(KOKKOS_TPL_EXPORTS)
# Include a set of Kokkos-specific wrapper functions that
# will either call raw CMake or TriBITS
@ -137,6 +144,9 @@ GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables
# to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
# The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES
# 2) Check the compiler ID (type and version)
@ -187,14 +197,21 @@ IF (KOKKOS_HAS_TRILINOS)
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
# we have to match the annoying behavior
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
ENDFOREACH()
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS})
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}")
ENDIF()
# Both parent scope and this package
# In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in
# TRILINOS_TOPLEVEL_CXX_FLAGS
@ -203,6 +220,8 @@ IF (KOKKOS_HAS_TRILINOS)
#CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here
#These flags get set up in KOKKOS_PACKAGE_DECL, which means they
#must be configured before KOKKOS_PACKAGE_DECL
SET(KOKKOS_ALL_COMPILE_OPTIONS
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_ALL_COMPILE_OPTIONS}>)
ENDIF()
KOKKOS_PACKAGE_DECL()
@ -250,7 +269,7 @@ INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CM
IF (HAS_PARENT)
FOREACH(DEV Kokkos_ENABLED_DEVICES)
#I would much rather not make these cache variables or global properties, but I can't
#make any guarantees on whether PARENT_SCOPE is good enough to make
#make any guarantees on whether PARENT_SCOPE is good enough to make
#these variables visible where I need them
SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE)
SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON)

View File

@ -1,13 +1,13 @@
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,6 +36,6 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
//
// ************************************************************************
//@HEADER

View File

@ -6,15 +6,20 @@ ifndef KOKKOS_PATH
endif
CXXFLAGS=$(CCFLAGS)
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 1
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#KOKKOS_DEVICES ?= "Pthread"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-GPUS: Vega900,Vega906
# AMD-CPUS: AMDAVX,Ryzen,EPYC
KOKKOS_ARCH ?= ""
# Options: yes,no
@ -35,6 +40,9 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
# Options: rdc
KOKKOS_HIP_OPTIONS ?= ""
# Default settings specific options.
# Options: enable_async_dispatch
KOKKOS_HPX_OPTIONS ?= ""
@ -82,29 +90,50 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
# Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads)
KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX)
KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
endif
# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm)
KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP)
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
KOKKOS_DEVICELIST =
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_DEVICELIST += Serial
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_DEVICELIST += OpenMP
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_DEVICELIST += Threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_DEVICELIST += HPX
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_DEVICELIST += Cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_DEVICELIST += HIP
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
ifeq ($(origin CUDA_PATH), undefined)
@ -132,6 +161,7 @@ KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMP
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
# Check Host Compiler if using NVCC through nvcc_wrapper
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -180,20 +210,20 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS =
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# TODO check if cray accepts GNU style warnings
KOKKOS_INTERNAL_COMPILER_WARNINGS =
else
#gcc
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
endif
endif
endif
@ -230,7 +260,12 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
#KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif
endif
@ -353,11 +388,8 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega)
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901)
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
@ -430,6 +462,10 @@ tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEAD
tmp := $(call kokkos_append_header,'\#else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif')
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
@ -442,9 +478,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -455,10 +497,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
endif
@ -966,6 +1004,14 @@ endif
# Figure out the architecture flag for Cuda.
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
@ -974,7 +1020,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
else
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) )
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
@ -1042,55 +1098,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_CXXFLAGS += --expt-extended-lambda
endif
endif
# Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
endif
KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_CXXLDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_TPL_LIBRARY_NAMES += hc_am m
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/ROCm/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXFLAGS+=-fgpu-rdc
KOKKOS_LDFLAGS+=-fgpu-rdc
else
KOKKOS_CXXFLAGS+=-fno-gpu-rdc
KOKKOS_LDFLAGS+=-fno-gpu-rdc
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
@ -1141,7 +1191,7 @@ endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
@ -1149,6 +1199,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -1178,22 +1229,6 @@ endif
KOKKOS_TPL_LIBRARY_NAMES += pthread
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(QTHREADS_PATH),)
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib
KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
endif
KOKKOS_LIBS += -lqthread
KOKKOS_TPL_LIBRARY_NAMES += qthread
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp)

View File

@ -55,6 +55,17 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_KernelLaunch.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
@ -79,13 +90,6 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
@ -106,10 +110,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp

View File

@ -11,8 +11,8 @@ CUDA, HPX, OpenMP and Pthreads as backend programming models with several other
backends in development.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
# Learning about Kokkos
@ -23,7 +23,7 @@ For questions find us on Slack: https://kokkosteam.slack.com or open a github is
For non-public questions send an email to
crtrott(at)sandia.gov
A separate repository with extensive tutorial material can be found under
A separate repository with extensive tutorial material can be found under
https://github.com/kokkos/kokkos-tutorials.
Furthermore, the 'example/tutorial' directory provides step by step tutorial
@ -41,12 +41,12 @@ To learn more about Kokkos consider watching one of our presentations:
# Contributing to Kokkos
We are open and try to encourage contributions from external developers.
We are open and try to encourage contributions from external developers.
To do so please first open an issue describing the contribution and then issue
a pull request against the develop branch. For larger features it may be good
to get guidance from the core development team first through the github issue.
to get guidance from the core development team first through the github issue.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Which means contributing to Kokkos allows anyone else to use your contributions
not just for public purposes but also for closed source commercial projects.
For specifics see the LICENSE file contained in the repository or distribution.
@ -94,9 +94,9 @@ For specifics see the LICENSE file contained in the repository or distribution.
* Intel 18.2.199 (with gcc 4.9.3)
### Primary tested compilers on ARM (Cavium ThunderX2)
* GCC 7.2.0
* GCC 7.2.0
* ARM/Clang 18.4.0
### Other compilers working:
* X86:
* Cygwin 2.1.0 64bit with gcc 4.9.3
@ -110,47 +110,47 @@ For specifics see the LICENSE file contained in the repository or distribution.
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
with warnings as errors. They also are tested with a comprehensive set of
backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...).
We are using the following set of flags:
* GCC:
* GCC:
````
-Wall -Wshadow -pedantic
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body
-Wignored-qualifiers -Wempty-body
-Wclobbered -Wuninitialized
````
* Intel:
* Intel:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* Clang:
* Clang:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
````
* NVCC:
* NVCC:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
Other compilers are tested occasionally, in particular when pushing from develop to
Other compilers are tested occasionally, in particular when pushing from develop to
master branch. These are tested less rigorously without `-Werror` and only for a select set of backends.
# Building and Installing Kokkos
Kokkos provide a CMake build system and a raw Makefile build system.
Kokkos provide a CMake build system and a raw Makefile build system.
The CMake build system is strongly encouraged and will be the most rigorously supported in future releases.
Full details are given in the [build instructions](BUILD.md). Basic setups are shown here:
## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
````
cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \
@ -162,9 +162,9 @@ cmake $srcdir \
````
then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages.
To validate the Kokkos build, configure with
To validate the Kokkos build, configure with
````
-DKokkos_ENABLE_TESTS=On
-DKokkos_ENABLE_TESTS=On
````
and run `make test` after completing the build.
@ -209,7 +209,7 @@ For a complete list of Kokkos options, run:
spack info kokkos
````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
````
spack find -p kokkos ...
@ -217,7 +217,7 @@ spack find -p kokkos ...
where `...` is the unique spec identifying the particular Kokkos configuration and version.
## Raw Makefile
## Raw Makefile
A bash script is provided to generate raw makefiles.
To install Kokkos as a library create a build directory and run the following
````
@ -240,33 +240,33 @@ changing the device type for which to build.
For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package.
The main reason is that you may otherwise need many different
configurations of Kokkos installed depending on the required compile time
features an application needs. For example there is only one default
features an application needs. For example there is only one default
execution space, which means you need different installations to have OpenMP
or Pthreads as the default space. Also for the CUDA backend there are certain
choices, such as allowing relocatable device code, which must be made at
choices, such as allowing relocatable device code, which must be made at
installation time. Building Kokkos inline uses largely the same process
as compiling an application against an installed Kokkos library.
as compiling an application against an installed Kokkos library.
For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
# Kokkos and CUDA UVM
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
* In multi socket multi GPU machines without NVLINK, UVM defaults
* In multi socket multi GPU machines without NVLINK, UVM defaults
to using zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.

View File

@ -537,6 +537,145 @@ struct rand<Generator, Kokkos::complex<double> > {
}
};
template <class DeviceType>
class Random_XorShift1024_Pool;
namespace Impl {
template <bool UseCArrayState>
struct Random_XorShift1024_State {
uint64_t state_[16];
KOKKOS_DEFAULTED_FUNCTION
Random_XorShift1024_State() = default;
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx) {
for (int i = 0; i < 16; i++) state_[i] = v(state_idx, i);
}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i]; }
};
template <>
struct Random_XorShift1024_State<false> {
uint64_t* state_;
const int stride_;
KOKKOS_FUNCTION
Random_XorShift1024_State() : state_(nullptr), stride_(1){};
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx)
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i * stride_]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i * stride_]; }
};
template <class ExecutionSpace>
struct Random_XorShift1024_UseCArrayState : std::true_type {};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Cuda> : std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::HIP>
: std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_OPENMPTARGET
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
: std::false_type {};
#endif
template <class ExecutionSpace>
struct Random_UniqueIndex {
using locks_view_type = View<int*, ExecutionSpace>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = ExecutionSpace::hardware_thread_id();
#else
const int i = ExecutionSpace::impl_hardware_thread_id();
#endif
return i;
#else
return 0;
#endif
}
};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_UniqueIndex<Kokkos::Cuda> {
using locks_view_type = View<int*, Kokkos::Cuda>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
const int i_offset =
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
hipThreadIdx_z;
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
hipBlockIdx_z) *
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
} // namespace Impl
template <class DeviceType>
class Random_XorShift64_Pool;
@ -550,10 +689,10 @@ class Random_XorShift64 {
public:
typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffff / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffLL / 2 - 1) };
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION
Random_XorShift64(uint64_t state, int state_idx = 0)
@ -637,10 +776,12 @@ class Random_XorShift64 {
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
@ -648,10 +789,12 @@ class Random_XorShift64 {
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
@ -662,6 +805,11 @@ class Random_XorShift64 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -669,7 +817,7 @@ class Random_XorShift64 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
return U * sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -681,9 +829,10 @@ class Random_XorShift64 {
template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool {
private:
typedef View<int*, DeviceType> lock_type;
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<uint64_t*, DeviceType> state_data_type;
lock_type locks_;
locks_type locks_;
state_data_type state_;
int num_states_;
@ -695,11 +844,8 @@ class Random_XorShift64_Pool {
Random_XorShift64_Pool() { num_states_ = 0; }
Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
init(seed, execution_space().concurrency());
}
KOKKOS_INLINE_FUNCTION
@ -719,11 +865,11 @@ class Random_XorShift64_Pool {
num_states_ = num_states;
locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_);
locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename lock_type::HostMirror h_lock = create_mirror_view(locks_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
// Execute on the HostMirror's default execution space.
Random_XorShift64<typename state_data_type::HostMirror::execution_space>
@ -746,13 +892,8 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = DeviceType::hardware_thread_id();
;
#else
const int i = DeviceType::impl_hardware_thread_id();
;
#endif
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift64<DeviceType>(state_(i), i);
}
@ -765,35 +906,35 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
}
};
template <class DeviceType>
class Random_XorShift1024_Pool;
template <class DeviceType>
class Random_XorShift1024 {
using execution_space = typename DeviceType::execution_space;
private:
int p_;
const int state_idx_;
uint64_t state_[16];
Impl::Random_XorShift1024_State<
Impl::Random_XorShift1024_UseCArrayState<execution_space>::value>
state_;
friend class Random_XorShift1024_Pool<DeviceType>;
public:
typedef Random_XorShift1024_Pool<DeviceType> pool_type;
typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p), state_idx_(state_idx) {
for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i);
}
: p_(p), state_idx_(state_idx), state_(state, state_idx) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
@ -876,10 +1017,12 @@ class Random_XorShift1024 {
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
@ -887,10 +1030,12 @@ class Random_XorShift1024 {
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
@ -901,6 +1046,11 @@ class Random_XorShift1024 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -908,7 +1058,7 @@ class Random_XorShift1024 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
return U * sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -920,10 +1070,12 @@ class Random_XorShift1024 {
template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool {
private:
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<int*, DeviceType> int_view_type;
typedef View<uint64_t * [16], DeviceType> state_data_type;
int_view_type locks_;
locks_type locks_;
state_data_type state_;
int_view_type p_;
int num_states_;
@ -939,11 +1091,8 @@ class Random_XorShift1024_Pool {
inline Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
init(seed, execution_space().concurrency());
}
KOKKOS_INLINE_FUNCTION
@ -965,12 +1114,12 @@ class Random_XorShift1024_Pool {
inline void init(uint64_t seed, int num_states) {
if (seed == 0) seed = uint64_t(1318319);
num_states_ = num_states;
locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_);
locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename int_view_type::HostMirror h_lock = create_mirror_view(locks_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
typename int_view_type::HostMirror h_p = create_mirror_view(p_);
// Execute on the HostMirror's default execution space.
@ -997,11 +1146,8 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = DeviceType::hardware_thread_id();
#else
const int i = DeviceType::impl_hardware_thread_id();
#endif
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift1024<DeviceType>(state_, p_(i), i);
};
@ -1014,482 +1160,11 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
p_(state.state_idx_) = state.p_;
p_(state.state_idx_) = state.p_;
locks_(state.state_idx_) = 0;
}
};
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
template <>
class Random_XorShift1024<Kokkos::Cuda> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Cuda>;
public:
typedef Kokkos::Cuda device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<Kokkos::Cuda>::Random_XorShift64_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Cuda>
Random_XorShift64_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Cuda>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Cuda>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift64<Kokkos::Cuda>& state) const {
state_(state.state_idx_) = state.state_;
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<Kokkos::Cuda>::Random_XorShift1024_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Cuda>
Random_XorShift1024_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift1024<Kokkos::Cuda>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
#if defined(KOKKOS_ENABLE_ROCM)
template <>
class Random_XorShift1024<Kokkos::Experimental::ROCm> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>;
public:
typedef Kokkos::Experimental::ROCm device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Experimental::ROCm>
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift64<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Experimental::ROCm>
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift1024<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
namespace Impl {
template <class ViewType, class RandomPool, int loops, int rank,
@ -2043,7 +1718,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
int64_t LDA = a.extent(0);
if (LDA > 0)
parallel_for((LDA + 127) / 128,
parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_range<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, range));
@ -2055,7 +1730,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type end) {
int64_t LDA = a.extent(0);
if (LDA > 0)
parallel_for((LDA + 127) / 128,
parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, begin, end));

View File

@ -201,7 +201,7 @@ class BinSort {
bool sort_within_bins;
public:
BinSort() {}
BinSort() = default;
//----------------------------------------
// Constructor: takes the keys, the binning_operator and optionally whether to
@ -327,7 +327,7 @@ class BinSort {
Kokkos::RangePolicy<execution_space>(0, len), functor);
}
Kokkos::fence();
execution_space().fence();
}
template <class ValuesViewType>
@ -349,14 +349,14 @@ class BinSort {
public:
KOKKOS_INLINE_FUNCTION
void operator()(const bin_count_tag& tag, const int& i) const {
void operator()(const bin_count_tag& /*tag*/, const int i) const {
const int j = range_begin + i;
bin_count_atomic(bin_op.bin(keys, j))++;
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_offset_tag& tag, const int& i, value_type& offset,
const bool& final) const {
void operator()(const bin_offset_tag& /*tag*/, const int i,
value_type& offset, const bool& final) const {
if (final) {
bin_offsets(i) = offset;
}
@ -364,7 +364,7 @@ class BinSort {
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_binning_tag& tag, const int& i) const {
void operator()(const bin_binning_tag& /*tag*/, const int i) const {
const int j = range_begin + i;
const int bin = bin_op.bin(keys, j);
const int count = bin_count_atomic(bin)++;
@ -373,7 +373,7 @@ class BinSort {
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_sort_bins_tag& tag, const int& i) const {
void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const {
auto bin_size = bin_count_const(i);
if (bin_size <= 1) return;
int upper_bound = bin_offsets(i) + bin_size;
@ -381,7 +381,7 @@ class BinSort {
while (!sorted) {
sorted = true;
int old_idx = sort_order(bin_offsets(i));
int new_idx;
int new_idx = 0;
for (int k = bin_offsets(i) + 1; k < upper_bound; k++) {
new_idx = sort_order(k);
@ -446,7 +446,7 @@ struct BinOp3D {
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
BinOp3D() {}
BinOp3D() = default;
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[]) {

View File

@ -20,16 +20,38 @@ KOKKOS_ADD_TEST_LIBRARY(
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
)
KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
ELSE()
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
ENDIF()
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
SET(SOURCES
UnitTestMain.cpp
TestCuda.cpp
)
UnitTestMain.cpp
)
IF(Kokkos_ENABLE_OPENMP)
LIST( APPEND SOURCES
TestOpenMP.cpp
TestOpenMP_Sort1D.cpp
TestOpenMP_Sort3D.cpp
TestOpenMP_SortDynamicView.cpp
TestOpenMP_Random.cpp
)
ENDIF()
IF(Kokkos_ENABLE_HIP)
LIST( APPEND SOURCES
TestHIP.cpp
)
ENDIF()
IF(Kokkos_ENABLE_CUDA)
LIST( APPEND SOURCES
TestCuda.cpp
)
ENDIF()

View File

@ -44,7 +44,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
OBJ_OPENMP = TestOpenMP.o TestOpenMP_Random.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif

View File

@ -59,11 +59,15 @@
namespace Test {
void cuda_test_random_xorshift64(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
}
void cuda_test_random_xorshift1024(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
}
#define CUDA_RANDOM_XORSHIFT64(num_draws) \

View File

@ -0,0 +1,83 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_HIP
#include <cstdint>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
void hip_test_random_xorshift64(size_t num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::HIP>>(
num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
void hip_test_random_xorshift1024(size_t num_draws) {
Impl::test_random<
Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::HIP>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
TEST(hip, Random_XorShift64) { hip_test_random_xorshift64(132141141); }
TEST(hip, Random_XorShift1024_0) { hip_test_random_xorshift1024(52428813); }
TEST(hip, SortUnsigned) {
Impl::test_sort<Kokkos::Experimental::HIP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTHIP_PREVENT_LINK_ERROR() {}
#endif /* #ifdef KOKKOS_ENABLE_HIP */

View File

@ -55,30 +55,8 @@
namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort<Kokkos::OpenMP>(); }
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_SORT_UNSIGNED(size) \
TEST(openmp, SortUnsigned) { \
Impl::test_sort<Kokkos::OpenMP, unsigned>(size); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}

View File

@ -0,0 +1,77 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <iomanip>
namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned1D) {
Impl::test_1D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned3D) {
Impl::test_3D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsignedDynamicView) {
Impl::test_dynamic_view_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -140,7 +140,7 @@ struct test_random_functor {
density_3d(d3d) {}
KOKKOS_INLINE_FUNCTION
void operator()(int i, RandomProperties& prop) const {
void operator()(int /*i*/, RandomProperties& prop) const {
using Kokkos::atomic_fetch_add;
rnd_type rand_gen = rand_pool.get_state();

View File

@ -130,7 +130,7 @@ struct sum3D {
};
template <class ExecutionSpace, typename KeyType>
void test_1D_sort(unsigned int n, bool force_kokkos) {
void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
KeyViewType keys("Keys", n);
@ -165,7 +165,7 @@ void test_1D_sort(unsigned int n, bool force_kokkos) {
}
template <class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int n) {
void test_3D_sort_impl(unsigned int n) {
typedef Kokkos::View<KeyType * [3], ExecutionSpace> KeyViewType;
KeyViewType keys("Keys", n * n * n);
@ -214,7 +214,7 @@ void test_3D_sort(unsigned int n) {
//----------------------------------------------------------------------------
template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort(unsigned int n) {
void test_dynamic_view_sort_impl(unsigned int n) {
typedef Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>
KeyDynamicViewType;
typedef Kokkos::View<KeyType*, ExecutionSpace> KeyViewType;
@ -278,7 +278,7 @@ void test_dynamic_view_sort(unsigned int n) {
//----------------------------------------------------------------------------
template <class ExecutionSpace>
void test_issue_1160() {
void test_issue_1160_impl() {
Kokkos::View<int*, ExecutionSpace> element_("element", 10);
Kokkos::View<double*, ExecutionSpace> x_("x", 10);
Kokkos::View<double*, ExecutionSpace> v_("y", 10);
@ -346,16 +346,33 @@ void test_issue_1160() {
//----------------------------------------------------------------------------
template <class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N) {
test_1D_sort<ExecutionSpace, KeyType>(N * N * N, true);
test_1D_sort<ExecutionSpace, KeyType>(N * N * N, false);
#if !defined(KOKKOS_ENABLE_ROCM)
test_3D_sort<ExecutionSpace, KeyType>(N);
test_dynamic_view_sort<ExecutionSpace, KeyType>(N * N);
#endif
test_issue_1160<ExecutionSpace>();
void test_1D_sort(unsigned int N) {
test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, true);
test_1D_sort_impl<ExecutionSpace, KeyType>(N * N * N, false);
}
template <class ExecutionSpace, typename KeyType>
void test_3D_sort(unsigned int N) {
test_3D_sort_impl<ExecutionSpace, KeyType>(N);
}
template <class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort(unsigned int N) {
test_dynamic_view_sort_impl<ExecutionSpace, KeyType>(N * N);
}
template <class ExecutionSpace>
void test_issue_1160_sort() {
test_issue_1160_impl<ExecutionSpace>();
}
template <class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N) {
test_1D_sort<ExecutionSpace, KeyType>(N);
test_3D_sort<ExecutionSpace, KeyType>(N);
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
test_issue_1160_sort<ExecutionSpace>();
}
} // namespace Impl
} // namespace Test
#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */

10
lib/kokkos/appveyor.yml Normal file
View File

@ -0,0 +1,10 @@
image:
- Visual Studio 2019
clone_folder: c:\projects\source
build_script:
- cmd: >-
mkdir build &&
cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF &&
cmake --build . --target install &&
ctest -C Debug -V

View File

@ -61,7 +61,7 @@ typedef int GUPSIndex;
double now() {
struct timeval now;
gettimeofday(&now, NULL);
gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
}

View File

@ -64,7 +64,7 @@ typedef int StreamIndex;
double now() {
struct timeval now;
gettimeofday(&now, NULL);
gettimeofday(&now, nullptr);
return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6);
}

View File

@ -1,339 +0,0 @@
#!/bin/bash
update_kokkos_devices() {
SEARCH_TEXT="*$1*"
if [[ $KOKKOS_DEVICES == $SEARCH_TEXT ]]; then
echo kokkos devices already includes $SEARCH_TEXT
else
if [ "$KOKKOS_DEVICES" = "" ]; then
KOKKOS_DEVICES="$1"
echo reseting kokkos devices to $KOKKOS_DEVICES
else
KOKKOS_DEVICES="${KOKKOS_DEVICES},$1"
echo appending to kokkos devices $KOKKOS_DEVICES
fi
fi
}
get_kokkos_device_list() {
KOKKOS_DEVICE_CMD=
PARSE_DEVICES_LST=$(echo $KOKKOS_DEVICES | tr "," "\n")
for DEVICE_ in $PARSE_DEVICES_LST
do
UC_DEVICE=$(echo $DEVICE_ | tr "[:lower:]" "[:upper:]")
KOKKOS_DEVICE_CMD="-DKokkos_ENABLE_${UC_DEVICE}=ON ${KOKKOS_DEVICE_CMD}"
done
}
get_kokkos_arch_list() {
KOKKOS_ARCH_CMD=
PARSE_ARCH_LST=$(echo $KOKKOS_ARCH | tr "," "\n")
for ARCH_ in $PARSE_ARCH_LST
do
UC_ARCH=$(echo $ARCH_ | tr "[:lower:]" "[:upper:]")
KOKKOS_ARCH_CMD="-DKokkos_ARCH_${UC_ARCH}=ON ${KOKKOS_ARCH_CMD}"
done
}
get_kokkos_cuda_option_list() {
echo parsing KOKKOS_CUDA_OPTIONS=$KOKKOS_CUDA_OPTIONS
KOKKOS_CUDA_OPTION_CMD=
PARSE_CUDA_LST=$(echo $KOKKOS_CUDA_OPTIONS | tr "," "\n")
for CUDA_ in $PARSE_CUDA_LST
do
CUDA_OPT_NAME=
if [ "${CUDA_}" == "enable_lambda" ]; then
CUDA_OPT_NAME=CUDA_LAMBDA
elif [ "${CUDA_}" == "rdc" ]; then
CUDA_OPT_NAME=CUDA_RELOCATABLE_DEVICE_CODE
elif [ "${CUDA_}" == "force_uvm" ]; then
CUDA_OPT_NAME=CUDA_UVM
elif [ "${CUDA_}" == "use_ldg" ]; then
CUDA_OPT_NAME=CUDA_LDG_INTRINSIC
else
echo "${CUDA_} is not a valid cuda options..."
fi
if [ "${CUDA_OPT_NAME}" != "" ]; then
KOKKOS_CUDA_OPTION_CMD="-DKokkos_ENABLE_${CUDA_OPT_NAME}=ON ${KOKKOS_CUDA_OPTION_CMD}"
fi
done
}
get_kokkos_option_list() {
echo parsing KOKKOS_OPTIONS=$KOKKOS_OPTIONS
KOKKOS_OPTION_CMD=
PARSE_OPTIONS_LST=$(echo $KOKKOS_OPTIONS | tr "," "\n")
for OPT_ in $PARSE_OPTIONS_LST
do
UC_OPT_=$(echo $OPT_ | tr "[:lower:]" "[:upper:]")
if [[ "$UC_OPT_" == *DISABLE* ]]; then
FLIP_OPT_=${UC_OPT_/DISABLE/ENABLE}
KOKKOS_OPTION_CMD="-DKokkos_${FLIP_OPT_}=OFF ${KOKKOS_OPTION_CMD}"
elif [[ "$UC_OPT_" == *ENABLE* ]]; then
KOKKOS_OPTION_CMD="-DKokkos_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}"
else
KOKKOS_OPTION_CMD="-DKokkos_ENABLE_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}"
fi
done
}
display_help_text() {
echo "Kokkos configure options:"
echo ""
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory."
echo "--prefix=/Install/Path: Path to install the Kokkos library."
echo ""
echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit."
echo "--with-openmp: Enable OpenMP backend."
echo "--with-pthread: Enable Pthreads backend."
echo "--with-serial: Enable Serial backend."
echo "--with-devices: Explicitly add a set of backends."
echo ""
echo "--arch=[OPT]: Set target architectures. Options are:"
echo " [AMD]"
echo " AMDAVX = AMD CPU"
echo " EPYC = AMD EPYC Zen-Core CPU"
echo " [ARM]"
echo " ARMv80 = ARMv8.0 Compatible CPU"
echo " ARMv81 = ARMv8.1 Compatible CPU"
echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU"
echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU"
echo " [IBM]"
echo " BGQ = IBM Blue Gene Q"
echo " Power7 = IBM POWER7 and POWER7+ CPUs"
echo " Power8 = IBM POWER8 CPUs"
echo " Power9 = IBM POWER9 CPUs"
echo " [Intel]"
echo " WSM = Intel Westmere CPUs"
echo " SNB = Intel Sandy/Ivy Bridge CPUs"
echo " HSW = Intel Haswell CPUs"
echo " BDW = Intel Broadwell Xeon E-class CPUs"
echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)"
echo " [Intel Xeon Phi]"
echo " KNC = Intel Knights Corner Xeon Phi"
echo " KNL = Intel Knights Landing Xeon Phi"
echo " [NVIDIA]"
echo " Kepler30 = NVIDIA Kepler generation CC 3.0"
echo " Kepler32 = NVIDIA Kepler generation CC 3.2"
echo " Kepler35 = NVIDIA Kepler generation CC 3.5"
echo " Kepler37 = NVIDIA Kepler generation CC 3.7"
echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0"
echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2"
echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3"
echo " Pascal60 = NVIDIA Pascal generation CC 6.0"
echo " Pascal61 = NVIDIA Pascal generation CC 6.1"
echo " Volta70 = NVIDIA Volta generation CC 7.0"
echo " Volta72 = NVIDIA Volta generation CC 7.2"
echo ""
echo "--compiler=/Path/To/Compiler Set the compiler."
echo "--debug,-dbg: Enable Debugging."
echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test"
echo " build. This will still set certain required"
echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp,"
echo " --std=c++11, etc.)."
echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test"
echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a"
echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test"
echo " build. This will still set certain required"
echo " flags via KOKKOS_LDFLAGS (such as -fopenmp,"
echo " -lpthread, etc.)."
echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance"
echo " tests.)"
echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library."
echo "--with-memkind=/Path/To/MemKind: Set path to memkind library."
echo "--with-options=[OPT]: Additional options to Kokkos:"
echo " compiler_warnings"
echo " aggressive_vectorization = add ivdep on loops"
echo " disable_profiling = do not compile with profiling hooks"
echo " "
echo "--with-cuda-options=[OPT]: Additional options to CUDA:"
echo " force_uvm, use_ldg, enable_lambda, rdc"
echo "--with-hpx-options=[OPT]: Additional options to HPX:"
echo " enable_async_dispatch"
echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)"
echo "--make-j=[NUM]: DEPRECATED: call make with appropriate"
echo " -j flag"
}
while [[ $# > 0 ]]
do
key="$1"
case $key in
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--hpx-path*)
HPX_PATH="${key#*=}"
;;
--prefix*)
PREFIX="${key#*=}"
;;
--with-cuda)
update_kokkos_devices Cuda
CUDA_PATH_NVCC=$(command -v nvcc)
CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc}
;;
# Catch this before '--with-cuda*'
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="${key#*=}"
;;
--with-cuda*)
update_kokkos_devices Cuda
CUDA_PATH="${key#*=}"
;;
--with-openmp)
update_kokkos_devices OpenMP
;;
--with-pthread)
update_kokkos_devices Pthread
;;
--with-serial)
update_kokkos_devices Serial
;;
--with-hpx-options*)
KOKKOS_HPX_OPT="${key#*=}"
;;
--with-hpx*)
update_kokkos_devices HPX
if [ -z "$HPX_PATH" ]; then
HPX_PATH="${key#*=}"
fi
;;
--with-devices*)
DEVICES="${key#*=}"
PARSE_DEVICES=$(echo $DEVICES | tr "," "\n")
for DEVICE_ in $PARSE_DEVICES
do
update_kokkos_devices $DEVICE_
done
;;
--with-gtest*)
GTEST_PATH="${key#*=}"
;;
--with-hwloc*)
HWLOC_PATH="${key#*=}"
;;
--with-memkind*)
MEMKIND_PATH="${key#*=}"
;;
--arch*)
KOKKOS_ARCH="${key#*=}"
;;
--cxxflags*)
KOKKOS_CXXFLAGS="${key#*=}"
KOKKOS_CXXFLAGS=${KOKKOS_CXXFLAGS//,/ }
;;
--cxxstandard*)
KOKKOS_CXX_STANDARD="${key#*=}"
;;
--ldflags*)
KOKKOS_LDFLAGS="${key#*=}"
;;
--debug|-dbg)
KOKKOS_DEBUG=yes
;;
--make-j*)
echo "Warning: ${key} is deprecated"
echo "Call make with appropriate -j flag"
;;
--compiler*)
COMPILER="${key#*=}"
CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l)
if [ ${CNUM} -gt 0 ]; then
echo "Invalid compiler by --compiler command: '${COMPILER}'"
exit
fi
if [[ ! -n ${COMPILER} ]]; then
echo "Empty compiler specified by --compiler command."
exit
fi
CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l)
if [ ${CNUM} -eq 0 ]; then
echo "Invalid compiler by --compiler command: '${COMPILER}'"
exit
fi
# ... valid compiler, ensure absolute path set
WCOMPATH=$(command -v $COMPILER)
COMPDIR=$(dirname $WCOMPATH)
COMPNAME=$(basename $WCOMPATH)
COMPILER=${COMPDIR}/${COMPNAME}
;;
--with-options*)
KOKKOS_OPTIONS="${key#*=}"
;;
--gcc-toolchain*)
KOKKOS_GCC_TOOLCHAIN="${key#*=}"
;;
--help)
display_help_text
exit 0
;;
*)
echo "warning: ignoring unknown option $key"
;;
esac
shift
done
if [ "$COMPILER" == "" ]; then
COMPILER_CMD=
else
COMPILER_CMD=-DCMAKE_CXX_COMPILER=$COMPILER
fi
if [ "$KOKKOS_DEBUG" == "" ]; then
KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=RELEASE
else
KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=DEBUG
fi
if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then
if [ "${KOKKOS_PATH}" == "" ]; then
CM_SCRIPT=$0
KOKKOS_PATH=`dirname $CM_SCRIPT`
if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then
echo "${KOKKOS_PATH} repository appears to not be complete. please verify and try again"
exit 0
fi
else
echo "KOKKOS_PATH does not appear to be set properly. please specify in location of CMakeLists.txt"
display_help_text
exit 0
fi
fi
get_kokkos_device_list
get_kokkos_option_list
get_kokkos_arch_list
get_kokkos_cuda_option_list
## if HPX is enabled, we need to enforce cxx standard = 14
if [[ ${KOKKOS_DEVICE_CMD} == *Kokkos_ENABLE_HPX* ]]; then
if [ "${KOKKOS_CXX_STANDARD}" == "" ] || [ ${#KOKKOS_CXX_STANDARD} -lt 14 ]; then
echo CXX Standard must be 14 or higher for HPX to work.
KOKKOS_CXX_STANDARD=14
fi
fi
if [ "$KOKKOS_CXX_STANDARD" == "" ]; then
STANDARD_CMD=
else
STANDARD_CMD=-DKokkos_CXX_STANDARD=${KOKKOS_CXX_STANDARD}
fi
if [[ ${COMPILER} == *clang* ]]; then
gcc_path=$(which g++ | awk --field-separator='/bin/g++' '{printf $1}' )
KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --gcc-toolchain=${gcc_path}"
if [ ! "${CUDA_PATH}" == "" ]; then
KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --cuda-path=${CUDA_PATH}"
fi
fi
echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH}
cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH}

View File

@ -6,5 +6,4 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
Core core PS REQUIRED
Containers containers PS OPTIONAL
Algorithms algorithms PS OPTIONAL
Example example EX OPTIONAL
)

View File

@ -73,6 +73,9 @@ function(kokkos_check)
# use it to check that there are variables defined for all required
# arguments. Success or failure messages will be displayed but we are
# responsible for signaling failure and skip the build system generation.
if (KOKKOS_CHECK_RETURN_VALUE)
set(Kokkos_${arg}_FIND_QUIETLY ON)
endif()
find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG
${KOKKOS_CHECK_${arg}})
if(NOT Kokkos_${arg}_FOUND)

View File

@ -5,11 +5,19 @@
#define KOKKOS_CORE_CONFIG_H
#endif
// KOKKOS_VERSION % 100 is the patch level
// KOKKOS_VERSION / 100 % 100 is the minor version
// KOKKOS_VERSION / 10000 is the major version
#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
/* Execution Spaces */
#cmakedefine KOKKOS_ENABLE_SERIAL
#cmakedefine KOKKOS_ENABLE_OPENMP
#cmakedefine KOKKOS_ENABLE_OPENMPTARGET
#cmakedefine KOKKOS_ENABLE_THREADS
#cmakedefine KOKKOS_ENABLE_CUDA
#cmakedefine KOKKOS_ENABLE_HIP
#cmakedefine KOKKOS_ENABLE_HPX
#cmakedefine KOKKOS_ENABLE_MEMKIND
#cmakedefine KOKKOS_ENABLE_LIBRT
@ -33,6 +41,7 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
#cmakedefine KOKKOS_ENABLE_DEBUG
#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK

View File

@ -0,0 +1,17 @@
IF (NOT TARGET Kokkos::kokkos)
# Compute the installation prefix relative to this file.
get_filename_component(KOKKOS_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH)
if(KOKKOS_IMPORT_PREFIX STREQUAL "/")
set(KOKKOS_IMPORT_PREFIX "")
endif()
add_library(Kokkos::kokkos INTERFACE IMPORTED)
set_target_properties(Kokkos::kokkos PROPERTIES
INTERFACE_LINK_LIBRARIES "@Kokkos_LIBRARIES@;@KOKKOS_LINK_OPTIONS@"
INTERFACE_COMPILE_FEATURES "@KOKKOS_CXX_STANDARD_FEATURE@"
INTERFACE_COMPILE_OPTIONS "@KOKKOS_ALL_COMPILE_OPTIONS@"
INTERFACE_INCLUDE_DIRECTORIES "${KOKKOS_IMPORT_PREFIX}/include"
)
ENDIF()

View File

@ -1,8 +1,12 @@
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# Note: "stubs" suffix allows CMake to find the dummy
# libcuda.so provided by the NVIDIA CUDA Toolkit for
# cross-compiling CUDA on a host without a GPU.
KOKKOS_FIND_IMPORTED(CUDA INTERFACE
LIBRARIES cudart cuda
LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH
LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH /usr/local/cuda
LIBRARY_SUFFIXES lib lib64 lib/stubs lib64/stubs
ALLOW_SYSTEM_PATH_FALLBACK
)
ELSE()

View File

@ -3,15 +3,18 @@ TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG
${KOKKOS_TOP_BUILD_DIR}/tpl_tests
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp
LINK_LIBRARIES -pthread
COMPILE_DEFINITIONS -pthread)
COMPILE_DEFINITIONS -pthread
)
# The test no longer requires C++11
# if we did needed C++ standard support, then we should add option
# ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION}
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG)
KOKKOS_CREATE_IMPORTED_TPL(PTHREAD
INTERFACE #this is not a real library with a real location
COMPILE_OPTIONS -pthread
LINK_OPTIONS -pthread)
#Only create the TPL if we succeed
IF (KOKKOS_HAS_PTHREAD_ARG)
KOKKOS_CREATE_IMPORTED_TPL(PTHREAD
INTERFACE #this is not a real library with a real location
COMPILE_OPTIONS -pthread
LINK_OPTIONS -pthread)
ENDIF()

View File

@ -67,7 +67,7 @@ Note that all of these use `PUBLIC`! Almost every Kokkos flag is not private to
### Compiler Features and Compiler Options
Compiler options are flags like `-fopenmp` that do not need to be "resolved."
Compiler options are flags like `-fopenmp` that do not need to be "resolved."
The flag is either on or off.
Compiler features are more fine-grained and require conflicting requests to be resolved.
Suppose I have
@ -145,11 +145,11 @@ If Kokkos depends on, e.g. `hwloc` the downstream project will also need to link
There are three stages in adding a new third-party library (TPL):
* Finding: find the desired library on the system and verify the installation is correct
* Importing: create a CMake target, if necessary, that is compatible with `target_link_libraries`. This is mostly relevant for TPLs not installed with CMake.
* Exporting: make the desired library visible to downstream projects
* Exporting: make the desired library visible to downstream projects
TPLs are somewhat complicated by whether the library was installed with CMake or some other build system.
If CMake, our lives are greatly simplified. We simply use `find_package` to locate the installed CMake project then call `target_link_libraries(kokkoscore PUBLIC/PRIVATE TPL)`. For libaries not installed with CMake, the process is a bit more complex.
It is up to the Kokkos developers to "convert" the library into a CMake target as if it had been installed as a valid modern CMake target with properties.
It is up to the Kokkos developers to "convert" the library into a CMake target as if it had been installed as a valid modern CMake target with properties.
There are helper functions for simplifying the process of importing TPLs in Kokkos, but we walk through the process in detail to clearly illustrate the steps involved.
#### TPL Search Order
@ -166,8 +166,9 @@ There are 3 possibilities that could be used:
The following is the search order that Kokkos follows. Note: This differs from the default search order used by CMake `find_library` and `find_header`. CMake prefers default system paths over user-provided paths.
For Kokkos (and package managers in general), it is better to prefer user-provided paths since this usually indicates a specific version we want.
1. `<NAME>_ROOT`
1. `Kokkos_<NAME>_DIR`
1. `<NAME>_ROOT` command line option
1. `<NAME>_ROOT` environment variable
1. `Kokkos_<NAME>_DIR` command line option
1. Paths added by Kokkos CMake logic
1. Default system paths (if allowed)

View File

@ -1,6 +1,6 @@
#include <omp.h>
int main(int argc, char** argv) {
int main(int, char**) {
int thr = omp_get_num_threads();
if (thr > 0)
return thr;

View File

@ -4,6 +4,10 @@ void* kokkos_test(void* args) { return args; }
int main(void) {
pthread_t thread;
/* Use NULL to avoid C++11. Some compilers
do not have C++11 by default. Forcing C++11
in the compile tests can be done, but is unnecessary
*/
pthread_create(&thread, NULL, kokkos_test, NULL);
pthread_join(thread, NULL);
return 0;

View File

@ -24,10 +24,6 @@ IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "")
ENDIF()
ENDMACRO()
MACRO(GLOBAL_RESET VARNAME)
SET(${VARNAME} "" CACHE INTERNAL "" FORCE)
ENDMACRO()
MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE)
SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE)
ENDMACRO()
@ -88,13 +84,9 @@ MACRO(ADD_INTERFACE_LIBRARY LIB_NAME)
SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE)
ENDMACRO()
IF(NOT TARGET check)
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
ENDIF()
FUNCTION(KOKKOS_ADD_TEST)
if (KOKKOS_HAS_TRILINOS)
CMAKE_PARSE_ARGUMENTS(TEST
CMAKE_PARSE_ARGUMENTS(TEST
""
"EXE;NAME"
""
@ -108,22 +100,27 @@ FUNCTION(KOKKOS_ADD_TEST)
TRIBITS_ADD_TEST(
${EXE_ROOT}
NAME ${TEST_NAME}
${ARGN}
COMM serial mpi
NUM_MPI_PROCS 1
${TEST_UNPARSED_ARGUMENTS}
)
else()
CMAKE_PARSE_ARGUMENTS(TEST
CMAKE_PARSE_ARGUMENTS(TEST
"WILL_FAIL"
"FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME"
"CATEGORIES;CMD_ARGS"
${ARGN})
# To match Tribits, we should always be receiving
# the root names of exes/libs
IF(TEST_EXE)
SET(EXE ${TEST_EXE})
SET(EXE_ROOT ${TEST_EXE})
ELSE()
SET(EXE ${TEST_NAME})
SET(EXE_ROOT ${TEST_NAME})
ENDIF()
# Prepend package name to the test name
# These should be the full target name
SET(TEST_NAME ${PACKAGE_NAME}_${TEST_NAME})
SET(EXE ${PACKAGE_NAME}_${EXE_ROOT})
IF(WIN32)
ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_CMD_ARGS})
ELSE()
@ -160,7 +157,7 @@ FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN})
else()
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
""
""
"REQUIRED_HEADERS;REQUIRED_LIBS_NAMES"
@ -214,13 +211,13 @@ ENDFUNCTION()
FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET)
IF(KOKKOS_HAS_TRILINOS)
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
#don't trust tribits to do this correctly - but need to add package name
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN})
ELSEIF(TARGET ${TARGET})
ELSEIF(TARGET ${TARGET})
#the target actually exists - this means we are doing separate libs
#or this a test library
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN})
ELSE()
GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES)
@ -239,7 +236,7 @@ ELSE()
SET(options INTERFACE)
SET(oneValueArgs)
SET(multiValueArgs)
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
"INTERFACE"
""
""
@ -264,7 +261,7 @@ ELSE()
SET(oneValueArgs)
SET(multiValueArgs HEADERS SOURCES)
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
"STATIC;SHARED"
""
"HEADERS;SOURCES"
@ -277,10 +274,6 @@ ELSE()
LIST(REMOVE_DUPLICATES PARSE_SOURCES)
ENDIF()
ADD_LIBRARY(${NAME} ${PARSE_SOURCES})
target_link_libraries(
${NAME}
PUBLIC kokkos
)
ENDIF()
ENDFUNCTION()

View File

@ -9,52 +9,6 @@ FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION)
SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE)
ENDFUNCTION()
FUNCTION(ARCH_FLAGS)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_ONLY;COMPILE_ONLY"
""
"${COMPILERS}"
${ARGN})
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
SET(FLAGS)
SET(NEW_COMPILE_OPTIONS)
SET(NEW_XCOMPILER_OPTIONS)
SET(NEW_LINK_OPTIONS)
LIST(APPEND NEW_XCOMPILER_OPTIONS ${KOKKOS_XCOMPILER_OPTIONS})
LIST(APPEND NEW_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND NEW_LINK_OPTIONS ${KOKKOS_LINK_OPTIONS})
FOREACH(COMP ${COMPILERS})
IF (COMPILER STREQUAL "${COMP}")
IF (PARSE_${COMPILER})
IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED")
SET(FLAGS ${PARSE_${COMPILER}})
ENDIF()
ELSEIF(PARSE_DEFAULT)
SET(FLAGS ${PARSE_DEFAULT})
ENDIF()
ENDIF()
ENDFOREACH()
IF (NOT LINK_ONLY)
# The funky logic here is for future handling of argument deduplication
# If we naively pass multiple -Xcompiler flags to target_compile_options
# -Xcompiler will get deduplicated and break the build
IF ("-Xcompiler" IN_LIST FLAGS)
LIST(REMOVE_ITEM FLAGS "-Xcompiler")
GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${FLAGS})
ELSE()
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${FLAGS})
ENDIF()
ENDIF()
IF (NOT COMPILE_ONLY)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${FLAGS})
ENDIF()
ENDFUNCTION()
# Make sure devices and compiler ID are done
KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID)
@ -98,14 +52,15 @@ KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0")
KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2")
KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5")
KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture")
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
IF (KOKKOS_ENABLE_CUDA)
#Regardless of version, make sure we define the general architecture name
IF (KOKKOS_ARCH_KEPLER30 OR KOKKOS_ARCH_KEPLER32 OR KOKKOS_ARCH_KEPLER35 OR KOKKOS_ARCH_KEPLER37)
SET(KOKKOS_ARCH_KEPLER ON)
ENDIF()
#Regardless of version, make sure we define the general architecture name
IF (KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53)
SET(KOKKOS_ARCH_MAXWELL ON)
@ -126,13 +81,13 @@ ENDIF()
IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
SET(COMMON_WARNINGS
"-Wall" "-Wshadow" "-pedantic"
"-Wall" "-Wunused-parameter" "-Wshadow" "-pedantic"
"-Wsign-compare" "-Wtype-limits" "-Wuninitialized")
SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers"
${COMMON_WARNINGS})
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
GNU ${GNU_WARNINGS}
DEFAULT ${COMMON_WARNINGS}
@ -141,7 +96,8 @@ ENDIF()
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
GLOBAL_RESET(KOKKOS_CUDA_OPTIONS)
#clear anything that might be in the cache
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
# Construct the Makefile options
IF (KOKKOS_ENABLE_CUDA_LAMBDA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
@ -157,6 +113,7 @@ ENDIF()
IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
SET(CUDA_ARCH_FLAG "--cuda-gpu-arch")
SET(AMDGPU_ARCH_FLAG "--amdgpu-target")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda)
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE)
@ -171,27 +128,13 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo)
ENDIF()
UNSET(_UPPERCASE_CMAKE_BUILD_TYPE)
IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0)
GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored)
IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0 AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0)
GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored)
ENDIF()
ENDIF()
IF(KOKKOS_ENABLE_OPENMP)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang)
MESSAGE(FATAL_ERROR "Apple Clang does not support OpenMP. Use native Clang instead")
ENDIF()
ARCH_FLAGS(
Clang -fopenmp=libomp
PGI -mp
NVIDIA -Xcompiler -fopenmp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
ENDIF()
IF (KOKKOS_ARCH_ARMV80)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a
@ -199,7 +142,7 @@ IF (KOKKOS_ARCH_ARMV80)
ENDIF()
IF (KOKKOS_ARCH_ARMV81)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8.1-a
@ -208,7 +151,7 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX)
SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a -mtune=thunderx
@ -217,7 +160,7 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99
@ -225,7 +168,7 @@ IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
ENDIF()
IF (KOKKOS_ARCH_EPYC)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -mavx2
DEFAULT -march=znver1 -mtune=znver1
)
@ -234,7 +177,7 @@ IF (KOKKOS_ARCH_EPYC)
ENDIF()
IF (KOKKOS_ARCH_WSM)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -xSSE4.2
PGI -tp=nehalem
Cray NO-VALUE-SPECIFIED
@ -245,7 +188,7 @@ ENDIF()
IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
SET(KOKKOS_ARCH_AVX ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -mavx
PGI -tp=sandybridge
Cray NO-VALUE-SPECIFIED
@ -255,7 +198,7 @@ ENDIF()
IF (KOKKOS_ARCH_HSW)
SET(KOKKOS_ARCH_AVX2 ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX2
PGI -tp=haswell
Cray NO-VALUE-SPECIFIED
@ -265,7 +208,7 @@ ENDIF()
IF (KOKKOS_ARCH_BDW)
SET(KOKKOS_ARCH_AVX2 ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX2
PGI -tp=haswell
Cray NO-VALUE-SPECIFIED
@ -275,7 +218,7 @@ ENDIF()
IF (KOKKOS_ARCH_EPYC)
SET(KOKKOS_ARCH_AMD_AVX2 ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -mvax2
DEFAULT -march=znver1 -mtune=znver1
)
@ -284,7 +227,7 @@ ENDIF()
IF (KOKKOS_ARCH_KNL)
#avx512-mic
SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -xMIC-AVX512
PGI NO-VALUE-SPECIFIED
Cray NO-VALUE-SPECIFIED
@ -294,7 +237,7 @@ ENDIF()
IF (KOKKOS_ARCH_KNC)
SET(KOKKOS_USE_ISA_KNC ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
DEFAULT -mmic
)
ENDIF()
@ -302,7 +245,7 @@ ENDIF()
IF (KOKKOS_ARCH_SKX)
#avx512-xeon
SET(KOKKOS_ARCH_AVX512XEON ON)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Intel -xCORE-AVX512
PGI NO-VALUE-SPECIFIED
Cray NO-VALUE-SPECIFIED
@ -319,7 +262,7 @@ IF (KOKKOS_ARCH_BDW OR KOKKOS_ARCH_SKX)
ENDIF()
IF (KOKKOS_ARCH_POWER7)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
DEFAULT -mcpu=power7 -mtune=power7
)
@ -327,7 +270,7 @@ IF (KOKKOS_ARCH_POWER7)
ENDIF()
IF (KOKKOS_ARCH_POWER8)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED
DEFAULT -mcpu=power8 -mtune=power8
@ -335,7 +278,7 @@ IF (KOKKOS_ARCH_POWER8)
ENDIF()
IF (KOKKOS_ARCH_POWER9)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED
DEFAULT -mcpu=power9 -mtune=power9
@ -347,33 +290,50 @@ IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9)
ENDIF()
IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
ARCH_FLAGS(
COMPILER_SPECIFIC_FLAGS(
Clang -fcuda-rdc
NVIDIA --relocatable-device-code=true
)
ENDIF()
#Right now we cannot get the compiler ID when cross-compiling, so just check
#that HIP is enabled
IF (Kokkos_ENABLE_HIP)
IF (Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fgpu-rdc
)
ELSE()
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fno-gpu-rdc
)
ENDIF()
ENDIF()
SET(CUDA_ARCH_ALREADY_SPECIFIED "")
FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
IF(KOKKOS_ARCH_${ARCH})
IF(CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.")
ENDIF()
SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE)
IF (NOT KOKKOS_ENABLE_CUDA)
MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA is OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ARCH_${ARCH})
IF(CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.")
ENDIF()
SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE)
IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET)
MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE()
SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
ENDIF()
ENDFUNCTION()
#These will define KOKKOS_CUDA_ARCH_FLAG
#to the corresponding flag name if ON
CHECK_CUDA_ARCH(KEPLER30 sm_30)
CHECK_CUDA_ARCH(KEPLER32 sm_32)
CHECK_CUDA_ARCH(KEPLER35 sm_35)
@ -383,18 +343,76 @@ CHECK_CUDA_ARCH(MAXWELL52 sm_52)
CHECK_CUDA_ARCH(MAXWELL53 sm_53)
CHECK_CUDA_ARCH(PASCAL60 sm_60)
CHECK_CUDA_ARCH(PASCAL61 sm_61)
CHECK_CUDA_ARCH(VOLTA70 sm_70)
CHECK_CUDA_ARCH(VOLTA72 sm_72)
CHECK_CUDA_ARCH(VOLTA70 sm_70)
CHECK_CUDA_ARCH(VOLTA72 sm_72)
CHECK_CUDA_ARCH(TURING75 sm_75)
SET(AMDGPU_ARCH_ALREADY_SPECIFIED "")
FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
IF(KOKKOS_ARCH_${ARCH})
IF(AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${AMDGPU_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.")
ENDIF()
SET(AMDGPU_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE)
IF (NOT KOKKOS_ENABLE_HIP AND NOT KOKKOS_ENABLE_OPENMPTARGET)
MESSAGE(WARNING "Given HIP arch ${ARCH}, but Kokkos_ENABLE_AMDGPU and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.")
UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE)
ELSE()
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_HIP)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
ENDIF()
ENDIF()
ENDFUNCTION()
#These will define KOKKOS_AMDGPU_ARCH_FLAG
#to the corresponding flag name if ON
CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
IF (KOKKOS_ENABLE_OPENMPTARGET)
SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG})
IF (CLANG_CUDA_ARCH)
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
)
ENDIF()
SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG})
IF (CLANG_AMDGPU_ARCH)
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa
)
ENDIF()
ENDIF()
IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED)
MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled. Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.")
ENDIF()
#CMake verbose is kind of pointless
#Let's just always print things
MESSAGE(STATUS "Execution Spaces:")
IF(KOKKOS_ENABLE_CUDA)
MESSAGE(STATUS " Device Parallel: CUDA")
ELSE()
MESSAGE(STATUS " Device Parallel: NONE")
FOREACH (_BACKEND CUDA OPENMPTARGET HIP)
IF(KOKKOS_ENABLE_${_BACKEND})
IF(_DEVICE_PARALLEL)
MESSAGE(FATAL_ERROR "Multiple device parallel execution spaces are not allowed! "
"Trying to enable execution space ${_BACKEND}, "
"but execution space ${_DEVICE_PARALLEL} is already enabled. "
"Remove the CMakeCache.txt file and re-configure.")
ENDIF()
SET(_DEVICE_PARALLEL ${_BACKEND})
ENDIF()
ENDFOREACH()
IF(NOT _DEVICE_PARALLEL)
SET(_DEVICE_PARALLEL "NONE")
ENDIF()
MESSAGE(STATUS " Device Parallel: ${_DEVICE_PARALLEL}")
UNSET(_DEVICE_PARALLEL)
FOREACH (_BACKEND OPENMP PTHREAD HPX)
IF(KOKKOS_ENABLE_${_BACKEND})

View File

@ -0,0 +1,12 @@
SET(CRAYPE_VERSION $ENV{CRAYPE_VERSION})
IF (CRAYPE_VERSION)
SET(KOKKOS_IS_CRAYPE TRUE)
SET(CRAYPE_LINK_TYPE $ENV{CRAYPE_LINK_TYPE})
IF (CRAYPE_LINK_TYPE)
IF (NOT CRAYPE_LINK_TYPE STREQUAL "dynamic")
MESSAGE(WARNING "CRAYPE_LINK_TYPE is set to ${CRAYPE_LINK_TYPE}. Linking is likely to fail unless this is set to 'dynamic'")
ENDIF()
ELSE()
MESSAGE(WARNING "CRAYPE_LINK_TYPE is not set. Linking is likely to fail unless this is set to 'dynamic'")
ENDIF()
ENDIF()

View File

@ -13,7 +13,7 @@ EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
STRING(REGEX REPLACE "^ +" ""
INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC})
INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
IF(INTERNAL_HAVE_COMPILER_NVCC)
@ -31,16 +31,32 @@ IF(INTERNAL_HAVE_COMPILER_NVCC)
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray)
# SET nvcc's compiler version.
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
# The Cray compiler reports as Clang to most versions of CMake
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
COMMAND grep Cray
COMMAND wc -l
OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER
OUTPUT_STRIP_TRAILING_WHITESPACE)
IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang
SET(KOKKOS_CLANG_IS_CRAY TRUE)
ENDIF()
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY)
# SET Cray's compiler version.
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version
OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$"
STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+"
TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION})
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
IF (KOKKOS_CLANG_IS_CRAY)
SET(KOKKOS_CLANG_CRAY_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION})
ELSE()
SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE)
ENDIF()
ENDIF()
# Enforce the minimum compilers supported by Kokkos.

View File

@ -1,4 +1,4 @@
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY)
# The clang "version" doesn't actually tell you what runtimes and tools
# were built into Clang. We should therefore make sure that libomp
# was actually built into Clang. Otherwise the user will get nonsensical
@ -11,7 +11,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP)
#also - this is easier to use than CMakeCheckCXXSourceCompiles
TRY_COMPILE(CLANG_HAS_OMP
${KOKKOS_TOP_BUILD_DIR}/corner_cases
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp
COMPILE_DEFINITIONS -fopenmp=libomp
LINK_LIBRARIES -fopenmp=libomp
)
@ -22,6 +22,30 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP)
UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
ENDIF()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang AND KOKKOS_ENABLE_OPENMP)
# The clang "version" doesn't actually tell you what runtimes and tools
# were built into Clang. We should therefore make sure that libomp
# was actually built into Clang. Otherwise the user will get nonsensical
# errors when they try to build.
#Try compile is the height of CMake nonsense
#I can't just give it compiler and link flags
#I have to hackily pretend that compiler flags are compiler definitions
#and that linker flags are libraries
#also - this is easier to use than CMakeCheckCXXSourceCompiles
TRY_COMPILE(APPLECLANG_HAS_OMP
${KOKKOS_TOP_BUILD_DIR}/corner_cases
${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp
COMPILE_DEFINITIONS -Xpreprocessor -fopenmp
LINK_LIBRARIES -lomp
)
IF (NOT APPLECLANG_HAS_OMP)
UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
MESSAGE(FATAL_ERROR "AppleClang failed OpenMP check. You have requested -DKokkos_ENABLE_OPENMP=ON, but the AppleClang compiler does not appear to have been built with OpenMP support")
ENDIF()
UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this
ENDIF()
IF (KOKKOS_CXX_STANDARD STREQUAL 17)
IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7)

View File

@ -31,6 +31,41 @@ ELSE()
SET(OMP_DEFAULT OFF)
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend")
IF(KOKKOS_ENABLE_OPENMP)
SET(ClangOpenMPFlag -fopenmp=libomp)
IF(KOKKOS_CLANG_IS_CRAY)
SET(ClangOpenMPFlag -fopenmp)
ENDIF()
COMPILER_SPECIFIC_FLAGS(
Clang ${ClangOpenMPFlag}
AppleClang -Xpreprocessor -fopenmp
PGI -mp
NVIDIA -Xcompiler -fopenmp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_LIBS(
AppleClang -lomp
)
ENDIF()
KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend")
IF (KOKKOS_ENABLE_OPENMPTARGET)
COMPILER_SPECIFIC_FLAGS(
Clang -fopenmp -fopenmp=libomp
XL -qsmp=omp -qoffload -qnoeh
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_DEFS(
XL KOKKOS_IBM_XL_OMP45_WORKAROUND
Clang KOKKOS_WORKAROUND_OPENMPTARGET_CLANG
)
# Are there compilers which identify as Clang and need this library?
# COMPILER_SPECIFIC_LIBS(
# Clang -lopenmptarget
# )
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(CUDA_DEFAULT ON)
@ -59,3 +94,5 @@ ENDIF()
KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend")
KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)")
KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend")

View File

@ -21,6 +21,7 @@ ENDFUNCTION()
# Certain defaults will depend on knowing the enabled devices
KOKKOS_CFG_DEPENDS(OPTIONS DEVICES)
KOKKOS_CFG_DEPENDS(OPTIONS COMPILER_ID)
# Put a check in just in case people are using this option
KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE)
@ -28,8 +29,10 @@ KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE)
KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA")
KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default")
KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics")
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch")
KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests")
KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build the examples")
STRING(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE)
IF(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
KOKKOS_ENABLE_OPTION(DEBUG ON "Whether to activate extra debug features - may increase compile times")
@ -51,12 +54,14 @@ IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}")
ENDIF()
IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA AND DEFINED KOKKOS_COMPILER_CUDA_VERSION AND KOKKOS_COMPILER_CUDA_VERSION GREATER 70)
SET(LAMBDA_DEFAULT ON)
IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(CUDA_LAMBDA_DEFAULT ON)
ELSEIF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang))
SET(CUDA_LAMBDA_DEFAULT ON)
ELSE()
SET(LAMBDA_DEFAULT OFF)
SET(CUDA_LAMBDA_DEFAULT OFF)
ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${LAMBDA_DEFAULT} "Whether to activate experimental lambda features")
KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to activate experimental lambda features")
IF (Trilinos_ENABLE_Kokkos)
SET(COMPLEX_ALIGN_DEFAULT OFF)
ELSE()
@ -64,7 +69,13 @@ ELSE()
ENDIF()
KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)")
KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR OFF "Whether to activate experimental relaxed constexpr functions")
IF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang))
SET(CUDA_CONSTEXPR_DEFAULT ON)
ELSE()
SET(CUDA_CONSTEXPR_DEFAULT OFF)
ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR ${CUDA_CONSTEXPR_DEFAULT} "Whether to activate experimental relaxed constexpr functions")
FUNCTION(check_device_specific_options)
CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN})
@ -84,9 +95,18 @@ FUNCTION(check_device_specific_options)
ENDFUNCTION()
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS HPX_ASYNC_DISPATCH)
# Needed due to change from deprecated name to new header define name
IF (KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
SET(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ON)
ENDIF()
# This is known to occur with Clang 9. We would need to use nvcc as the linker
# http://lists.llvm.org/pipermail/cfe-dev/2018-June/058296.html
# TODO: Through great effort we can use a different linker by hacking
# CMAKE_CXX_LINK_EXECUTABLE in a future release
IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE AND KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
MESSAGE(FATAL_ERROR "Relocatable device code is currently not supported with Clang - must use nvcc_wrapper or turn off RDC")
ENDIF()

View File

@ -3,9 +3,9 @@
# kokkos_option
# Validate options are given with correct case and define an internal
# upper-case version for use within
# upper-case version for use within
#
#
#
# @FUNCTION: kokkos_deprecated_list
#
@ -62,7 +62,7 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING)
UNSET(${opt} CACHE)
ELSE()
MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.")
ENDIF()
ENDIF()
ENDIF()
ENDIF()
ENDFOREACH()
@ -125,7 +125,7 @@ MACRO(kokkos_export_imported_tpl NAME)
KOKKOS_APPEND_CONFIG_LINE("IF(NOT TARGET ${NAME})")
KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)")
KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES")
GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION)
IF(TPL_LIBRARY)
KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}")
@ -198,7 +198,7 @@ MACRO(kokkos_import_tpl NAME)
# I have still been getting errors about ROOT variables being ignored
# I'm not sure if this is a scope issue - but make sure
# the policy is set before we do any find_package calls
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
CMAKE_POLICY(SET CMP0074 NEW)
ENDIF()
@ -341,11 +341,12 @@ ENDMACRO()
# default, custom paths are prioritized over system paths. The searched
# order is:
# 1. <NAME>_ROOT variable
# 2. Kokkos_<NAME>_DIR variable
# 3. Locations in the PATHS option
# 4. Default system paths, if allowed.
# 2. <NAME>_ROOT environment variable
# 3. Kokkos_<NAME>_DIR variable
# 4. Locations in the PATHS option
# 5. Default system paths, if allowed.
#
# Default system paths are allowed if none of options (1)-(3) are specified
# Default system paths are allowed if none of options (1)-(4) are specified
# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK
#
# Usage::
@ -387,33 +388,29 @@ MACRO(kokkos_find_header VAR_NAME HEADER TPL_NAME)
"PATHS"
${ARGN})
SET(${HEADER}_FOUND FALSE)
SET(${VAR_NAME} "${VARNAME}-NOTFOUND")
SET(HAVE_CUSTOM_PATHS FALSE)
IF(NOT ${HEADER}_FOUND AND DEFINED ${TPL_NAME}_ROOT)
#ONLY look in the root directory
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${${TPL_NAME}_ROOT}/include NO_DEFAULT_PATH)
IF(DEFINED ${TPL_NAME}_ROOT OR
DEFINED ENV{${TPL_NAME}_ROOT} OR
DEFINED KOKKOS_${TPL_NAME}_DIR OR
TPL_PATHS)
FIND_PATH(${VAR_NAME} ${HEADER}
PATHS
${${TPL_NAME}_ROOT}
$ENV{${TPL_NAME}_ROOT}
${KOKKOS_${TPL_NAME}_DIR}
${TPL_PATHS}
PATH_SUFFIXES include
NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF(NOT ${HEADER}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR)
#ONLY look in the root directory
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${KOKKOS_${TPL_NAME}_DIR}/include NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#No-op if ${VAR_NAME} set by previous call
FIND_PATH(${VAR_NAME} ${HEADER})
ENDIF()
IF (NOT ${HEADER}_FOUND AND TPL_PATHS)
#we got custom paths
#ONLY look in these paths and nowhere else
FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${TPL_PATHS} NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#Now go ahead and look in system paths
IF (NOT ${HEADER}_FOUND)
FIND_PATH(${VAR_NAME} ${HEADER})
ENDIF()
ENDIF()
ENDMACRO()
#
@ -424,9 +421,10 @@ ENDMACRO()
# default, custom paths are prioritized over system paths. The search
# order is:
# 1. <NAME>_ROOT variable
# 2. Kokkos_<NAME>_DIR variable
# 3. Locations in the PATHS option
# 4. Default system paths, if allowed.
# 2. <NAME>_ROOT environment variable
# 3. Kokkos_<NAME>_DIR variable
# 4. Locations in the PATHS option
# 5. Default system paths, if allowed.
#
# Default system paths are allowed if none of options (1)-(3) are specified
# or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK
@ -439,6 +437,7 @@ ENDMACRO()
# <TPL_NAME>
# [ALLOW_SYSTEM_PATH_FALLBACK]
# [PATHS path1 [path2 ...]]
# [SUFFIXES suffix1 [suffix2 ...]]
# )
#
# ``<VAR_NAME>``
@ -463,39 +462,46 @@ ENDMACRO()
#
# Custom paths to search for the library
#
# ``SUFFIXES``
#
# Suffixes appended to PATHS when attempting to locate
# the library. Defaults to {lib, lib64}.
#
MACRO(kokkos_find_library VAR_NAME LIB TPL_NAME)
CMAKE_PARSE_ARGUMENTS(TPL
"ALLOW_SYSTEM_PATH_FALLBACK"
""
"PATHS"
"PATHS;SUFFIXES"
${ARGN})
SET(${LIB}_FOUND FALSE)
IF(NOT TPL_SUFFIXES)
SET(TPL_SUFFIXES lib lib64)
ENDIF()
SET(${VAR_NAME} "${VARNAME}-NOTFOUND")
SET(HAVE_CUSTOM_PATHS FALSE)
IF(NOT ${LIB}_FOUND AND DEFINED ${TPL_NAME}_ROOT)
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${${TPL_NAME}_ROOT}/lib ${${TPL_NAME}_ROOT}/lib64 NO_DEFAULT_PATH)
IF(DEFINED ${TPL_NAME}_ROOT OR
DEFINED ENV{${TPL_NAME}_ROOT} OR
DEFINED KOKKOS_${TPL_NAME}_DIR OR
TPL_PATHS)
FIND_LIBRARY(${VAR_NAME} ${LIB}
PATHS
${${TPL_NAME}_ROOT}
$ENV{${TPL_NAME}_ROOT}
${KOKKOS_${TPL_NAME}_DIR}
${TPL_PATHS}
PATH_SUFFIXES
${TPL_SUFFIXES}
NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF(NOT ${LIB}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR)
#we got root paths, only look in these paths and nowhere else
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${KOKKOS_${TPL_NAME}_DIR}/lib ${KOKKOS_${TPL_NAME}_DIR}/lib64 NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
#No-op if ${VAR_NAME} set by previous call
FIND_LIBRARY(${VAR_NAME} ${LIB} PATH_SUFFIXES ${TPL_SUFFIXES})
ENDIF()
IF (NOT ${LIB}_FOUND AND TPL_PATHS)
#we got custom paths, only look in these paths and nowhere else
FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${TPL_PATHS} NO_DEFAULT_PATH)
SET(HAVE_CUSTOM_PATHS TRUE)
ENDIF()
IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK)
IF (NOT ${LIB}_FOUND)
#Now go ahead and look in system paths
FIND_LIBRARY(${VAR_NAME} ${LIB})
ENDIF()
ENDIF()
ENDMACRO()
#
@ -510,26 +516,28 @@ ENDMACRO()
# <NAME>
# INTERFACE
# ALLOW_SYSTEM_PATH_FALLBACK
# LIBRARY <path_to_librarY>
# LINK_LIBRARIES <lib1> <lib2> ...
# COMPILE_OPTIONS <opt1> <opt2> ...
# LINK_OPTIONS <opt1> <opt2> ...
# MODULE_NAME <name>
# IMPORTED_NAME <name>
# LIBRARY <name>
# LIBRARIES <name1> <name2> ...
# LIBRARY_PATHS <path1> <path2> ...
# LIBRARY_SUFFIXES <suffix1> <suffix2> ...
# HEADER <name>
# HEADERS <name1> <name2> ...
# HEADER_PATHS <path1> <path2> ...
# )
#
# ``INTERFACE``
#
# If specified, this TPL will build an INTERFACE library rather than an
# IMPORTED target
#
# ``ALLOW_SYSTEM_PATH_FALLBACK"
# ``ALLOW_SYSTEM_PATH_FALLBACK``
#
# If custom paths are given and the library is not found
# should we be allowed to search default system paths
# or error out if not found in given paths.
#
# ``LIBRARY <name>``
#
# If specified, this gives the name of the library to look for
#
# ``MODULE_NAME <name>``
#
# If specified, the name of the enclosing module passed to
@ -541,29 +549,42 @@ ENDMACRO()
# If specified, this gives the name of the target to build.
# Defaults to Kokkos::<NAME>
#
# ``LIBRARY <name>``
#
# If specified, this gives the name of the library to look for
#
# ``LIBRARIES <name1> <name2> ...``
#
# If specified, this gives a list of libraries to find for the package
#
# ``LIBRARY_PATHS <path1> <path2> ...``
#
# If specified, this gives a list of paths to search for the library
# If not given, <NAME>_ROOT/lib and <NAME>_ROOT/lib64 will be searched.
# If specified, this gives a list of paths to search for the library.
# If not given, <NAME>_ROOT will be searched.
#
# ``LIBRARY_SUFFIXES <suffix1> <suffix2> ...``
#
# Suffixes appended to LIBRARY_PATHS when attempting to locate
# libraries. If not given, defaults to {lib, lib64}.
#
# ``HEADER <name>``
#
# If specified, this gives the name of a header to to look for
#
# ``HEADERS <name1> <name2> ...``
#
# If specified, this gives a list of headers to find for the package
#
# ``HEADER_PATHS <path1> <path2> ...``
#
# If specified, this gives a list of paths to search for the headers
# If not given, <NAME>_ROOT/include and <NAME>_ROOT/include will be searched.
#
# ``HEADERS <name1> <name2> ...``
#
# If specified, this gives a list of headers to find for the package
#
# ``LIBRARIES <name1> <name2> ...``
#
# If specified, this gives a list of libraries to find for the package
#
MACRO(kokkos_find_imported NAME)
CMAKE_PARSE_ARGUMENTS(TPL
"INTERFACE;ALLOW_SYSTEM_PATH_FALLBACK"
"HEADER;LIBRARY;IMPORTED_NAME;MODULE_NAME"
"HEADER_PATHS;LIBRARY_PATHS;HEADERS;LIBRARIES"
"IMPORTED_NAME;MODULE_NAME;LIBRARY;HEADER"
"LIBRARIES;LIBRARY_PATHS;LIBRARY_SUFFIXES;HEADERS;HEADER_PATHS"
${ARGN})
IF(NOT TPL_MODULE_NAME)
@ -584,6 +605,10 @@ MACRO(kokkos_find_imported NAME)
ENDIF()
ENDIF()
IF (NOT TPL_LIBRARY_SUFFIXES)
SET(TPL_LIBRARY_SUFFIXES lib lib64)
ENDIF()
SET(${NAME}_INCLUDE_DIRS)
IF (TPL_HEADER)
KOKKOS_FIND_HEADER(${NAME}_INCLUDE_DIRS ${TPL_HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS})
@ -598,16 +623,22 @@ MACRO(kokkos_find_imported NAME)
SET(${NAME}_LIBRARY)
IF(TPL_LIBRARY)
KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS})
KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME}
${ALLOW_PATH_FALLBACK_OPT}
PATHS ${TPL_LIBRARY_PATHS}
SUFFIXES ${TPL_LIBRARY_SUFFIXES})
ENDIF()
SET(${NAME}_FOUND_LIBRARIES)
FOREACH(LIB ${TPL_LIBRARIES})
KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS})
KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME}
${ALLOW_PATH_FALLBACK_OPT}
PATHS ${TPL_LIBRARY_PATHS}
SUFFIXES ${TPL_LIBRARY_SUFFIXES})
IF(${LIB}_LOCATION)
LIST(APPEND ${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION})
ELSE()
SET(${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION})
SET(${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION})
BREAK()
ENDIF()
ENDFOREACH()
@ -629,6 +660,13 @@ MACRO(kokkos_find_imported NAME)
MARK_AS_ADVANCED(${NAME}_INCLUDE_DIRS ${NAME}_FOUND_LIBRARIES ${NAME}_LIBRARY)
#this is so much fun on a Cray system
#/usr/include should never be added as a -isystem include
#this freaks out the compiler include search order
IF (KOKKOS_IS_CRAYPE)
LIST(REMOVE_ITEM ${NAME}_INCLUDE_DIRS "/usr/include")
ENDIF()
IF (${TPL_MODULE_NAME}_FOUND)
SET(IMPORT_TYPE)
IF (TPL_INTERFACE)
@ -698,3 +736,66 @@ FUNCTION(kokkos_link_tpl TARGET)
ENDIF()
ENDFUNCTION()
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"
""
"${COMPILERS}"
${ARGN})
IF(PARSE_UNPARSED_ARGUMENTS)
MESSAGE(SEND_ERROR "'${PARSE_UNPARSED_ARGUMENTS}' argument(s) not recognized when providing compiler specific options")
ENDIF()
SET(COMPILER ${KOKKOS_CXX_COMPILER_ID})
SET(COMPILER_SPECIFIC_FLAGS_TMP)
FOREACH(COMP ${COMPILERS})
IF (COMPILER STREQUAL "${COMP}")
IF (PARSE_${COMPILER})
IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED")
SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_${COMPILER}})
ENDIF()
ELSEIF(PARSE_DEFAULT)
SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_DEFAULT})
ENDIF()
ENDIF()
ENDFOREACH()
IF (PARSE_COMPILE_OPTIONS)
# The funky logic here is for future handling of argument deduplication
# If we naively pass multiple -Xcompiler flags to target_compile_options
# -Xcompiler will get deduplicated and break the build
IF ("-Xcompiler" IN_LIST COMPILER_SPECIFIC_FLAGS_TMP)
LIST(REMOVE_ITEM COMPILER_SPECIFIC_FLAGS_TMP "-Xcompiler")
GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ELSE()
GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
ENDIF()
IF (PARSE_LINK_OPTIONS)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
IF (PARSE_COMPILE_DEFINITIONS)
GLOBAL_APPEND(KOKKOS_COMPILE_DEFINITIONS ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
IF (PARSE_LINK_LIBRARIES)
GLOBAL_APPEND(KOKKOS_LINK_LIBRARIES ${COMPILER_SPECIFIC_FLAGS_TMP})
ENDIF()
ENDFUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
FUNCTION(COMPILER_SPECIFIC_FLAGS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS LINK_OPTIONS)
ENDFUNCTION(COMPILER_SPECIFIC_FLAGS)
FUNCTION(COMPILER_SPECIFIC_DEFS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_DEFINITIONS)
ENDFUNCTION(COMPILER_SPECIFIC_DEFS)
FUNCTION(COMPILER_SPECIFIC_LIBS)
COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_LIBRARIES)
ENDFUNCTION(COMPILER_SPECIFIC_LIBS)

View File

@ -31,12 +31,11 @@ IF (NOT KOKKOS_HAS_TRILINOS)
ELSE()
CONFIGURE_FILE(cmake/KokkosConfigCommon.cmake.in ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake @ONLY)
file(READ ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake KOKKOS_CONFIG_COMMON)
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" ${KOKKOS_CONFIG_COMMON})
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_CONFIG_COMMON}")
CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY)
file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG)
file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}")
ENDIF()
# build and install pkgconfig file
CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR})

View File

@ -14,12 +14,12 @@ FUNCTION(kokkos_set_cxx_standard_feature standard)
ENDIF()
ELSEIF(CMAKE_CXX_EXTENSIONS)
IF(KOKKOS_DONT_ALLOW_EXTENSIONS)
MESSAGE(FATAL_ERROR "The chosen configuration does not support CXX extensions flags: ${KOKKOS_DONT_ALLOW_EXTENSIONS}. Must set CMAKE_CXX_EXTENSIONS=OFF to continue")
MESSAGE(FATAL_ERROR "The chosen configuration does not support CXX extensions flags: ${KOKKOS_DONT_ALLOW_EXTENSIONS}. Must set CMAKE_CXX_EXTENSIONS=OFF to continue")
ELSE()
GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS ON)
ENDIF()
ELSE()
#For trilinos, we need to make sure downstream projects
#For trilinos, we need to make sure downstream projects
GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS OFF)
ENDIF()
@ -29,6 +29,10 @@ FUNCTION(kokkos_set_cxx_standard_feature standard)
ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME})
MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
#MSVC doesn't need a command line flag, that doesn't mean it has no support
MESSAGE(STATUS "Using no flag for C++${standard} standard as feature")
GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME})
ELSE()
#nope, we can't do anything here
MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.")
@ -119,6 +123,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
INCLUDE(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSE()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake)
kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
@ -128,9 +135,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
IF (DEFINED CXX_STD_FLAGS_ACCEPTED)
UNSET(CXX_STD_FLAGS_ACCEPTED CACHE)
ENDIF()
CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_STANDARD_FLAG} CXX_STD_FLAGS_ACCEPTED)
CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_STANDARD_FLAG}" CXX_STD_FLAGS_ACCEPTED)
IF (NOT CXX_STD_FLAGS_ACCEPTED)
CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG} CXX_INT_STD_FLAGS_ACCEPTED)
CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}" CXX_INT_STD_FLAGS_ACCEPTED)
IF (NOT CXX_INT_STD_FLAGS_ACCEPTED)
MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG} or ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}")
ENDIF()

View File

@ -15,6 +15,10 @@ KOKKOS_TPL_OPTION(CUDA Off)
KOKKOS_TPL_OPTION(LIBRT Off)
KOKKOS_TPL_OPTION(LIBDL On)
IF(KOKKOS_ENABLE_PROFILING AND NOT KOKKOS_ENABLE_LIBDL)
MESSAGE(SEND_ERROR "Kokkos_ENABLE_PROFILING requires Kokkos_ENABLE_LIBDL=ON")
ENDIF()
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX)
SET(HPX_DEFAULT ON)
ELSE()

View File

@ -43,6 +43,8 @@ MACRO(KOKKOS_SUBPACKAGE NAME)
SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
#ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME})
#GLOBAL_SET(${PACKAGE_NAME}_LIBS "")
endif()
ENDMACRO()
@ -114,57 +116,63 @@ MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME)
VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS})
ENDMACRO()
FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME)
FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN})
TRIBITS_ADD_EXECUTABLE(${ROOT_NAME} ${ARGN})
else()
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
"TESTONLY"
""
"SOURCES;TESTONLYLIBS"
${ARGN})
SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME})
ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES})
IF (PARSE_TESTONLYLIBS)
TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS})
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE ${PARSE_TESTONLYLIBS})
ENDIF()
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS})
#All executables must link to all the kokkos targets
#This is just private linkage because exe is final
TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE kokkos)
endif()
ENDFUNCTION()
IF(NOT TARGET check)
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
ENDIF()
FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
IF (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXECUTABLE_AND_TEST(
${ROOT_NAME}
TESTONLYLIBS kokkos_gtest
${ROOT_NAME}
TESTONLYLIBS kokkos_gtest
${ARGN}
NUM_MPI_PROCS 1
COMM serial mpi
FAIL_REGULAR_EXPRESSION " FAILED "
)
ELSE()
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
""
""
"SOURCES;CATEGORIES"
${ARGN})
VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS})
SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME})
KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME}
KOKKOS_ADD_TEST_EXECUTABLE(${ROOT_NAME}
SOURCES ${PARSE_SOURCES}
)
KOKKOS_ADD_TEST(NAME ${ROOT_NAME}
EXE ${EXE_NAME}
KOKKOS_ADD_TEST(NAME ${ROOT_NAME}
EXE ${ROOT_NAME}
FAIL_REGULAR_EXPRESSION " FAILED "
)
ENDIF()
ENDFUNCTION()
FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
SET(TARGET_NAME ${PACKAGE_NAME}_${ROOT_NAME})
IF (NOT TARGET ${TARGET_NAME})
MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties")
ENDIF()
SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN})
ENDFUNCTION()
MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake)
@ -178,20 +186,17 @@ MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake)
ENDMACRO()
MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE
MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE
""
""
"SOURCES"
${ARGN})
KOKKOS_ADD_EXECUTABLE(${EXE_NAME}
KOKKOS_ADD_EXECUTABLE(${ROOT_NAME}
SOURCES ${PARSE_SOURCES}
${PARSE_UNPARSED_ARGUMENTS}
TESTONLYLIBS kokkos_gtest
)
IF (NOT KOKKOS_HAS_TRILINOS)
ADD_DEPENDENCIES(check ${EXE_NAME})
ENDIF()
ENDMACRO()
MACRO(KOKKOS_PACKAGE_POSTPROCESS)
@ -201,7 +206,7 @@ MACRO(KOKKOS_PACKAGE_POSTPROCESS)
ENDMACRO()
FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
"PLAIN_STYLE"
""
""
@ -230,6 +235,15 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_OPTIONS}>
)
TARGET_COMPILE_DEFINITIONS(
${LIBRARY_NAME} PUBLIC
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_COMPILE_DEFINITIONS}>
)
TARGET_LINK_LIBRARIES(
${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_LIBRARIES}
)
IF (KOKKOS_ENABLE_CUDA)
TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME}
@ -240,11 +254,18 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
LIST(APPEND NODEDUP_CUDAFE_OPTIONS -Xcudafe ${OPT})
ENDFOREACH()
TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME}
${LIBRARY_NAME}
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${NODEDUP_CUDAFE_OPTIONS}>
)
ENDIF()
IF (KOKKOS_ENABLE_HIP)
TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME}
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_AMDGPU_OPTIONS}>
)
ENDIF()
LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH)
IF (XOPT_LENGTH GREATER 1)
MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12")
@ -253,12 +274,12 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
SET(NODEDUP_XCOMPILER_OPTIONS)
FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS})
#I have to do this for now because we can't guarantee 3.12 support
#I really should do this with the shell option
LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler)
LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT})
#I really should do this with the shell option
LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler)
LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT})
ENDFOREACH()
TARGET_COMPILE_OPTIONS(
${LIBRARY_NAME}
${LIBRARY_NAME}
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${NODEDUP_XCOMPILER_OPTIONS}>
)
ENDIF()
@ -276,7 +297,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME)
ENDFUNCTION()
FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME)
CMAKE_PARSE_ARGUMENTS(PARSE
CMAKE_PARSE_ARGUMENTS(PARSE
"STATIC;SHARED"
""
"HEADERS;SOURCES"
@ -362,7 +383,7 @@ FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET)
#ignore the target, tribits doesn't do anything directly with targets
TRIBITS_INCLUDE_DIRECTORIES(${ARGN})
ELSE() #append to a list for later
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
FOREACH(DIR ${ARGN})
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>)
ENDFOREACH()
@ -390,3 +411,15 @@ MACRO(KOKKOS_ADD_TEST_DIRECTORIES)
ENDIF()
ENDIF()
ENDMACRO()
MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN})
else()
IF(KOKKOS_ENABLE_EXAMPLES)
FOREACH(EXAMPLE_DIR ${ARGN})
ADD_SUBDIRECTORY(${EXAMPLE_DIR})
ENDFOREACH()
ENDIF()
endif()
ENDMACRO()

View File

@ -0,0 +1,11 @@
FUNCTION(kokkos_set_msvc_flags full_standard int_standard)
IF (CMAKE_CXX_EXTENSIONS)
SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE)
ELSE()
SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE)
SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE)
ENDIF()
ENDFUNCTION()

View File

@ -55,19 +55,9 @@
# Check for CUDA support
IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1")
MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)")
IF (NOT TPL_ENABLE_CUDA)
MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA")
ELSE()
IF(CMAKE_VERSION VERSION_LESS "2.8.8")
# FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must
find_library(CUDA_cusparse_LIBRARY
cusparse
HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib
)
IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND")
MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.")
ENDIF()
ENDIF()
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})

View File

@ -76,19 +76,18 @@ CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS=""
# Default. Machine specific can override.
DEBUG=False
ARGS=""
CUSTOM_BUILD_LIST=""
QTHREADS_PATH=""
DRYRUN=False
BUILD_ONLY=False
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
@ -114,9 +113,6 @@ do
--kokkos-path*)
KOKKOS_PATH="${key#*=}"
;;
--qthreads-path*)
QTHREADS_PATH="${key#*=}"
;;
--build-list*)
CUSTOM_BUILD_LIST="${key#*=}"
;;
@ -417,8 +413,8 @@ if [ "$PRINT_HELP" = "True" ]; then
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial"
echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial"
echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial"
echo ""
echo "ARGS: list of expressions matching compilers to test"
@ -483,33 +479,6 @@ for ARG in $ARGS; do
done
done
# Check if Qthreads build requested.
HAVE_QTHREADS_BUILD="False"
if [ -n "$CUSTOM_BUILD_LIST" ]; then
if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then
HAVE_QTHREADS_BUILD="True"
fi
else
for COMPILER_DATA in "${COMPILERS[@]}"; do
ARR=($COMPILER_DATA)
BUILD_LIST=${ARR[2]}
if [[ "$BUILD_LIST" = *Qthreads* ]]; then
HAVE_QTHREADS_BUILD="True"
fi
done
fi
# Ensure Qthreads path is set if Qthreads build is requested.
if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then
if [ -z "$QTHREADS_PATH" ]; then
echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2
exit 1
else
# Strip trailing slashes from path.
QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//')
fi
fi
#
# Functions.
#
@ -627,14 +596,6 @@ single_build_and_test() {
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi
if [[ "$build" = *Qthreads* ]]; then
if [[ "$build_type" = hwloc* ]]; then
local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc"
else
local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH"
fi
fi
if [[ "$OPT_FLAG" = "" ]]; then
OPT_FLAG="-O3"
fi

View File

@ -5,58 +5,42 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
IF(Kokkos_ENABLE_CUDA)
SET(SOURCES
TestMain.cpp
TestMain.cpp
TestCuda.cpp
)
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Cuda
KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Cuda
SOURCES ${SOURCES}
)
KOKKOS_ADD_TEST( NAME PerformanceTest_Cuda
EXE PerfTestExec_Cuda
)
ENDIF()
IF(Kokkos_ENABLE_PTHREAD)
SET(SOURCES
TestMain.cpp
TestMain.cpp
TestThreads.cpp
)
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Threads
KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Threads
SOURCES ${SOURCES}
)
KOKKOS_ADD_TEST( NAME PerformanceTest_Threads
EXE PerfTestExec_Threads
)
ENDIF()
IF(Kokkos_ENABLE_OPENMP)
SET(SOURCES
TestMain.cpp
TestMain.cpp
TestOpenMP.cpp
)
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_OpenMP
KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_OpenMP
SOURCES ${SOURCES}
)
KOKKOS_ADD_TEST( NAME PerformanceTest_OpenMP
EXE PerfTestExec_OpenMP
)
ENDIF()
IF(Kokkos_ENABLE_HPX)
SET(SOURCES
TestMain.cpp
TestMain.cpp
TestHPX.cpp
)
KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_HPX
KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_HPX
SOURCES ${SOURCES}
)
KOKKOS_ADD_TEST( NAME PerformanceTest_HPX
EXE PerfTestExec_HPX
)
ENDIF()

View File

@ -103,19 +103,19 @@ class Bitset {
}
}
KOKKOS_INLINE_FUNCTION
KOKKOS_DEFAULTED_FUNCTION
Bitset(const Bitset<Device>&) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_DEFAULTED_FUNCTION
Bitset& operator=(const Bitset<Device>&) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_DEFAULTED_FUNCTION
Bitset(Bitset<Device>&&) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_DEFAULTED_FUNCTION
Bitset& operator=(Bitset<Device>&&) = default;
KOKKOS_INLINE_FUNCTION
KOKKOS_DEFAULTED_FUNCTION
~Bitset() = default;
/// number of bits in the set

View File

@ -238,6 +238,53 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
#endif
}
/// \brief Constructor that allocates View objects on both host and device.
///
/// This constructor works like the analogous constructor of View.
/// The first arguments are wrapped up in a ViewCtor class, this allows
/// for a label, without initializing, and all of the other things that can
/// be wrapped up in a Ctor class.
/// The arguments that follow are the dimensions of the
/// View objects. For example, if the View has three dimensions,
/// the first three integer arguments will be nonzero, and you may
/// omit the integer arguments that follow.
template <class... P>
DualView(const Impl::ViewCtorProp<P...>& arg_prop,
typename std::enable_if<!Impl::ViewCtorProp<P...>::has_pointer,
size_t>::type const n0 =
KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7),
h_view(create_mirror_view(d_view)) // without UVM, host View mirrors
,
modified_flags(t_modified_flags("DualView::modified_flags")) {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
modified_host = t_modified_flag(modified_flags, 0);
modified_device = t_modified_flag(modified_flags, 1);
#endif
}
explicit inline DualView(const ViewAllocateWithoutInitializing& arg_prop,
const size_t arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG)
: DualView(Impl::ViewCtorProp<std::string,
Kokkos::Impl::WithoutInitializing_t>(
arg_prop.label, Kokkos::WithoutInitializing),
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6,
arg_N7) {}
//! Copy constructor (shallow copy)
template <class SS, class LS, class DS, class MS>
DualView(const DualView<SS, LS, DS, MS>& src)
@ -470,23 +517,43 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// as modified, by calling the modify() method with the
/// appropriate template parameter.
template <class Device>
void sync(const typename Impl::enable_if<
void sync(const typename std::enable_if<
(std::is_same<typename traits::data_type,
typename traits::non_const_data_type>::value) ||
(std::is_same<Device, int>::value),
int>::type& = 0) {
if (modified_flags.data() == NULL) return;
if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type
if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), true);
}
#endif
deep_copy(d_view, h_view);
modified_flags(0) = modified_flags(1) = 0;
}
}
if (dev == 0) { // hopefully Device is the same as DualView's host type
if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), false);
}
#endif
deep_copy(h_view, d_view);
modified_flags(0) = modified_flags(1) = 0;
}
@ -499,12 +566,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
template <class Device>
void sync(const typename Impl::enable_if<
void sync(const typename std::enable_if<
(!std::is_same<typename traits::data_type,
typename traits::non_const_data_type>::value) ||
(std::is_same<Device, int>::value),
int>::type& = 0) {
if (modified_flags.data() == NULL) return;
if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>();
@ -527,8 +594,18 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
typename traits::non_const_data_type>::value)
Impl::throw_runtime_exception(
"Calling sync_host on a DualView with a const datatype.");
if (modified_flags.data() == NULL) return;
if (modified_flags.data() == nullptr) return;
if (modified_flags(1) > modified_flags(0)) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), false);
}
#endif
deep_copy(h_view, d_view);
modified_flags(1) = modified_flags(0) = 0;
}
@ -539,8 +616,18 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
typename traits::non_const_data_type>::value)
Impl::throw_runtime_exception(
"Calling sync_device on a DualView with a const datatype.");
if (modified_flags.data() == NULL) return;
if (modified_flags.data() == nullptr) return;
if (modified_flags(0) > modified_flags(1)) {
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<typename t_dev::memory_space,
Kokkos::CudaUVMSpace>::value) {
if (d_view.data() == h_view.data())
Kokkos::Impl::cuda_prefetch_pointer(
Kokkos::Cuda(), d_view.data(),
sizeof(typename t_dev::value_type) * d_view.span(), true);
}
#endif
deep_copy(d_view, h_view);
modified_flags(1) = modified_flags(0) = 0;
}
@ -548,7 +635,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
template <class Device>
bool need_sync() const {
if (modified_flags.data() == NULL) return false;
if (modified_flags.data() == nullptr) return false;
int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type
@ -565,12 +652,12 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
inline bool need_sync_host() const {
if (modified_flags.data() == NULL) return false;
if (modified_flags.data() == nullptr) return false;
return modified_flags(0) < modified_flags(1);
}
inline bool need_sync_device() const {
if (modified_flags.data() == NULL) return false;
if (modified_flags.data() == nullptr) return false;
return modified_flags(1) < modified_flags(0);
}
@ -581,7 +668,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// data as modified.
template <class Device>
void modify() {
if (modified_flags.data() == NULL) return;
if (modified_flags.data() == nullptr) return;
int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type
@ -612,7 +699,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
inline void modify_host() {
if (modified_flags.data() != NULL) {
if (modified_flags.data() != nullptr) {
modified_flags(0) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1)
: modified_flags(0)) +
@ -631,7 +718,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
inline void modify_device() {
if (modified_flags.data() != NULL) {
if (modified_flags.data() != nullptr) {
modified_flags(1) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1)
: modified_flags(0)) +
@ -650,7 +737,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
inline void clear_sync_state() {
if (modified_flags.data() != NULL)
if (modified_flags.data() != nullptr)
modified_flags(1) = modified_flags(0) = 0;
}
@ -675,7 +762,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
h_view = create_mirror_view(d_view);
/* Reset dirty flags */
if (modified_flags.data() == NULL) {
if (modified_flags.data() == nullptr) {
modified_flags = t_modified_flags("DualView::modified_flags");
} else
modified_flags(1) = modified_flags(0) = 0;
@ -693,7 +780,7 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
if (modified_flags.data() == NULL) {
if (modified_flags.data() == nullptr) {
modified_flags = t_modified_flags("DualView::modified_flags");
}
if (modified_flags(1) >= modified_flags(0)) {
@ -866,4 +953,27 @@ void deep_copy(
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//
// Non-member resize and realloc
//
template <class... Properties, class... Args>
void resize(DualView<Properties...>& dv, Args&&... args) noexcept(
noexcept(dv.resize(std::forward<Args>(args)...))) {
dv.resize(std::forward<Args>(args)...);
}
template <class... Properties, class... Args>
void realloc(DualView<Properties...>& dv, Args&&... args) noexcept(
noexcept(dv.realloc(std::forward<Args>(args)...))) {
dv.realloc(std::forward<Args>(args)...);
}
} // end namespace Kokkos
#endif

View File

@ -293,6 +293,7 @@ KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds(
dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...);
Kokkos::Impl::throw_runtime_exception(std::string(buffer));
#else
(void)tracker;
Kokkos::abort("DynRankView bounds error");
#endif
}
@ -1065,8 +1066,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
//----------------------------------------
// Standard constructor, destructor, and assignment operators...
KOKKOS_INLINE_FUNCTION
~DynRankView() {}
KOKKOS_DEFAULTED_FUNCTION
~DynRankView() = default;
KOKKOS_INLINE_FUNCTION
DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor
@ -1773,7 +1774,7 @@ struct DynRankViewRemap {
const Kokkos::Impl::ParallelFor<DynRankViewRemap, Policy> closure(
*this, Policy(0, n0));
closure.execute();
// Kokkos::fence(); // ??
// ExecSpace().fence(); // ??
}
KOKKOS_INLINE_FUNCTION
@ -1806,7 +1807,8 @@ inline void deep_copy(
const DynRankView<DT, DP...>& dst,
typename ViewTraits<DT, DP...>::const_value_type& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert(
std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type,
typename ViewTraits<DT, DP...>::value_type>::value,
@ -1843,7 +1845,7 @@ inline void deep_copy(
(std::is_same<typename DstType::traits::specialize, void>::value &&
std::is_same<typename SrcType::traits::specialize, void>::value &&
(Kokkos::is_dyn_rank_view<DstType>::value ||
Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = 0) {
Kokkos::is_dyn_rank_view<SrcType>::value))>::type* = nullptr) {
static_assert(
std::is_same<typename DstType::traits::value_type,
typename DstType::traits::non_const_value_type>::value,
@ -2009,7 +2011,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror(
typename std::enable_if<
std::is_same<typename ViewTraits<T, P...>::specialize, void>::value &&
!std::is_same<typename Kokkos::ViewTraits<T, P...>::array_layout,
Kokkos::LayoutStride>::value>::type* = 0) {
Kokkos::LayoutStride>::value>::type* = nullptr) {
typedef DynRankView<T, P...> src_type;
typedef typename src_type::HostMirror dst_type;
@ -2036,7 +2038,8 @@ template <class Space, class T, class... P>
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
const Space&, const Kokkos::DynRankView<T, P...>& src,
typename std::enable_if<std::is_same<
typename ViewTraits<T, P...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<T, P...>::specialize, void>::value>::type* =
nullptr) {
return typename Impl::MirrorDRVType<Space, T, P...>::view_type(
src.label(), Impl::reconstructLayout(src.layout(), src.rank()));
}
@ -2050,7 +2053,7 @@ inline typename DynRankView<T, P...>::HostMirror create_mirror_view(
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::
value)>::type* = 0) {
value)>::type* = nullptr) {
return src;
}
@ -2072,7 +2075,8 @@ template <class Space, class T, class... P>
typename Impl::MirrorDRViewType<Space, T, P...>::view_type create_mirror_view(
const Space&, const Kokkos::DynRankView<T, P...>& src,
typename std::enable_if<
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
return src;
}
@ -2094,7 +2098,8 @@ create_mirror_view_and_copy(
const Space&, const Kokkos::DynRankView<T, P...>& src,
std::string const& name = "",
typename std::enable_if<
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* = 0) {
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>::type* =
nullptr) {
(void)name;
return src;
}
@ -2139,7 +2144,7 @@ inline void resize(DynRankView<T, P...>& v,
static_assert(Kokkos::ViewTraits<T, P...>::is_managed,
"Can only resize managed views");
drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6);
drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6, n7);
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
@ -2166,7 +2171,7 @@ inline void realloc(DynRankView<T, P...>& v,
const std::string label = v.label();
v = drview_type(); // Deallocate first, if the only view to allocation
v = drview_type(label, n0, n1, n2, n3, n4, n5, n6);
v = drview_type(label, n0, n1, n2, n3, n4, n5, n6, n7);
}
} // namespace Kokkos

View File

@ -70,10 +70,10 @@ struct ChunkArraySpace<Kokkos::CudaSpace> {
using memory_space = typename Kokkos::CudaUVMSpace;
};
#endif
#ifdef KOKKOS_ENABLE_ROCM
#ifdef KOKKOS_ENABLE_HIP
template <>
struct ChunkArraySpace<Kokkos::Experimental::ROCmSpace> {
using memory_space = typename Kokkos::Experimental::ROCmHostPinnedSpace;
struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> {
using memory_space = typename Kokkos::Experimental::HIPHostPinnedSpace;
};
#endif
} // end namespace Impl
@ -248,8 +248,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
//----------------------------------------
template <typename I0, class... Args>
KOKKOS_INLINE_FUNCTION reference_type operator()(const I0& i0,
const Args&... args) const {
KOKKOS_INLINE_FUNCTION reference_type
operator()(const I0& i0, const Args&... /*args*/) const {
static_assert(Kokkos::Impl::are_integral<I0, Args...>::value,
"Indices must be integral type");
@ -265,7 +265,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// If not bounds checking then we assume a non-zero pointer is valid.
#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
if (0 == *ch)
if (nullptr == *ch)
#endif
{
// Verify that allocation of the requested chunk in in progress.
@ -280,7 +280,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// Allocation of this chunk is in progress
// so wait for allocation to complete.
while (0 == *ch)
while (nullptr == *ch)
;
}
@ -325,7 +325,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
--*pc;
typename traits::memory_space().deallocate(
m_chunks[*pc], sizeof(local_value_type) << m_chunk_shift);
m_chunks[*pc] = 0;
m_chunks[*pc] = nullptr;
}
}
// *m_chunks[m_chunk_max+1] stores the 'extent' requested by resize
@ -366,10 +366,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// Initialize or destroy array of chunk pointers.
// Two entries beyond the max chunks are allocation counters.
inline void operator()(unsigned i) const {
if (m_destroy && i < m_chunk_max && 0 != m_chunks[i]) {
if (m_destroy && i < m_chunk_max && nullptr != m_chunks[i]) {
typename traits::memory_space().deallocate(m_chunks[i], m_chunk_size);
}
m_chunks[i] = 0;
m_chunks[i] = nullptr;
}
void execute(bool arg_destroy) {
@ -419,7 +419,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
const unsigned min_chunk_size,
const unsigned max_extent)
: m_track(),
m_chunks(0)
m_chunks(nullptr)
// The chunk size is guaranteed to be a power of two
,
m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains(
@ -528,7 +528,7 @@ struct CommonSubview<Kokkos::Experimental::DynamicView<DP...>,
typedef SrcType src_subview_type;
dst_subview_type dst_sub;
src_subview_type src_sub;
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0)
CommonSubview(const DstType& dst, const SrcType& src, const Arg0& /*arg0*/)
: dst_sub(dst), src_sub(src) {}
};

View File

@ -187,7 +187,7 @@ template <typename ReportType, typename DeviceType>
void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) {
m_reports.resize(new_size);
m_reporters.resize(new_size);
Kokkos::fence();
typename DeviceType::execution_space().fence();
}
} // namespace Experimental

View File

@ -362,19 +362,18 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
//----------------------------------------
private:
enum {
is_layout_left =
std::is_same<typename traits::array_layout, Kokkos::LayoutLeft>::value,
static constexpr bool is_layout_left =
std::is_same<typename traits::array_layout, Kokkos::LayoutLeft>::value;
is_layout_right =
std::is_same<typename traits::array_layout, Kokkos::LayoutRight>::value,
static constexpr bool is_layout_right =
std::is_same<typename traits::array_layout, Kokkos::LayoutRight>::value;
is_layout_stride = std::is_same<typename traits::array_layout,
Kokkos::LayoutStride>::value,
static constexpr bool is_layout_stride =
std::is_same<typename traits::array_layout, Kokkos::LayoutStride>::value;
is_default_map = std::is_same<typename traits::specialize, void>::value &&
(is_layout_left || is_layout_right || is_layout_stride)
};
static constexpr bool is_default_map =
std::is_same<typename traits::specialize, void>::value &&
(is_layout_left || is_layout_right || is_layout_stride);
template <class Space, bool = Kokkos::Impl::MemorySpaceAccess<
Space, typename traits::memory_space>::accessible>
@ -804,8 +803,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
//----------------------------------------
// Standard destructor, constructors, and assignment operators
KOKKOS_INLINE_FUNCTION
~OffsetView() {}
KOKKOS_DEFAULTED_FUNCTION
~OffsetView() = default;
KOKKOS_INLINE_FUNCTION
OffsetView() : m_track(), m_map() {
@ -1317,7 +1316,7 @@ KOKKOS_INLINE_FUNCTION
KOKKOS_INLINE_FUNCTION
Kokkos::Impl::ALL_t shift_input(const Kokkos::Impl::ALL_t arg,
const int64_t offset) {
const int64_t /*offset*/) {
return arg;
}
@ -1347,9 +1346,9 @@ KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin(
template <size_t N, class Arg, class A>
KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin(
const size_t i, Kokkos::Array<int64_t, N>& subviewBegins,
typename std::enable_if<N == 0, const Arg>::type shiftedArg, const Arg arg,
const A viewBegins, size_t& counter) {}
const size_t /*i*/, Kokkos::Array<int64_t, N>& /*subviewBegins*/,
typename std::enable_if<N == 0, const Arg>::type /*shiftedArg*/,
const Arg /*arg*/, const A /*viewBegins*/, size_t& /*counter*/) {}
template <class D, class... P, class T>
KOKKOS_INLINE_FUNCTION
@ -1832,7 +1831,8 @@ inline void deep_copy(
const OffsetView<DT, DP...>& dst,
typename ViewTraits<DT, DP...>::const_value_type& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert(
std::is_same<typename ViewTraits<DT, DP...>::non_const_value_type,
typename ViewTraits<DT, DP...>::value_type>::value,
@ -1846,7 +1846,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const OffsetView<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -1859,7 +1860,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const OffsetView<DT, DP...>& dst, const View<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -1873,7 +1875,8 @@ template <class DT, class... DP, class ST, class... SP>
inline void deep_copy(
const View<DT, DP...>& dst, const OffsetView<ST, SP...>& value,
typename std::enable_if<std::is_same<
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* = 0) {
typename ViewTraits<DT, DP...>::specialize, void>::value>::type* =
nullptr) {
static_assert(
std::is_same<typename ViewTraits<DT, DP...>::value_type,
typename ViewTraits<ST, SP...>::non_const_value_type>::value,
@ -2011,7 +2014,7 @@ create_mirror_view(
std::is_same<
typename Kokkos::Experimental::OffsetView<T, P...>::data_type,
typename Kokkos::Experimental::OffsetView<
T, P...>::HostMirror::data_type>::value)>::type* = 0) {
T, P...>::HostMirror::data_type>::value)>::type* = nullptr) {
return src;
}

View File

@ -171,24 +171,41 @@ struct DefaultContribution<Kokkos::Cuda,
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct DefaultDuplication<Kokkos::Experimental::HIP> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HIP,
Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HIP,
Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
#endif
/* ScatterValue <Op=ScatterSum, contribution=ScatterNonAtomic> is the object
returned by the access operator() of ScatterAccess, This class inherits from
the Sum<> reducer and it wraps join(dest, src) with convenient operator+=,
etc. Note the addition of update(ValueType const& rhs) and reset() so that
all reducers can have common functions See ReduceDuplicates and
ResetDuplicates ) */
template <typename ValueType, int Op, int contribution>
template <typename ValueType, int Op, typename DeviceType, int contribution>
struct ScatterValue;
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
Kokkos::Experimental::ScatterNonAtomic>
: Sum<ValueType, Kokkos::DefaultExecutionSpace> {
: Sum<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Sum<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {}
: Sum<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
@ -206,13 +223,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps
Kokkos::atomic_add with convenient operator+=, etc. This version also has the
update(rhs) and reset() functions. */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, DeviceType,
Kokkos::Experimental::ScatterAtomic>
: Sum<ValueType, Kokkos::DefaultExecutionSpace> {
: Sum<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Sum<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Sum<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join(this->reference(), rhs);
@ -244,15 +261,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum,
etc. Note the addition of update(ValueType const& rhs) and reset() so that
all reducers can have common functions See ReduceDuplicates and
ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
Kokkos::Experimental::ScatterNonAtomic>
: Prod<ValueType, Kokkos::DefaultExecutionSpace> {
: Prod<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Prod<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {}
: Prod<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
@ -271,13 +288,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
atomic_prod with convenient operator*=, etc. atomic_prod uses the
atomic_compare_exchange. This version also has the update(rhs) and reset()
functions. */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
Kokkos::Experimental::ScatterAtomic>
: Prod<ValueType, Kokkos::DefaultExecutionSpace> {
: Prod<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Prod<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Prod<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join(this->reference(), rhs);
@ -320,15 +337,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd,
Note the addition of update(ValueType const& rhs) and reset() are so that all
reducers can have a common update function See ReduceDuplicates and
ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
Kokkos::Experimental::ScatterNonAtomic>
: Min<ValueType, Kokkos::DefaultExecutionSpace> {
: Min<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Min<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {}
: Min<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
@ -340,13 +357,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and
atomic_min with the update(rhs) function. atomic_min uses the
atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
Kokkos::Experimental::ScatterAtomic>
: Min<ValueType, Kokkos::DefaultExecutionSpace> {
: Min<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Min<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Min<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION
void atomic_min(ValueType& dest, const ValueType& src) const {
@ -382,15 +399,15 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin,
Note the addition of update(ValueType const& rhs) and reset() are so that all
reducers can have a common update function See ReduceDuplicates and
ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
Kokkos::Experimental::ScatterNonAtomic>
: Max<ValueType, Kokkos::DefaultExecutionSpace> {
: Max<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Max<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(other.reference()) {}
: Max<ValueType, DeviceType>(other.reference()) {}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
@ -402,13 +419,13 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax,
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and
atomic_max with the update(rhs) function. atomic_max uses the
atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax,
template <typename ValueType, typename DeviceType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
Kokkos::Experimental::ScatterAtomic>
: Max<ValueType, Kokkos::DefaultExecutionSpace> {
: Max<ValueType, DeviceType> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in)
: Max<ValueType, Kokkos::DefaultExecutionSpace>(value_in) {}
: Max<ValueType, DeviceType>(value_in) {}
KOKKOS_FORCEINLINE_FUNCTION
void atomic_max(ValueType& dest, const ValueType& src) const {
@ -558,6 +575,8 @@ struct ReduceDuplicatesBase {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
&kpID);
}
#else
(void)name;
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
@ -584,8 +603,9 @@ struct ReduceDuplicates
: Base(src_in, dst_in, stride_in, start_in, n_in, name) {}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
for (size_t j = Base::start; j < Base::n; ++j) {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv(
Base::dst[i]);
ScatterValue<ValueType, Op, ExecSpace,
Kokkos::Experimental::ScatterNonAtomic>
sv(Base::dst[i]);
sv.update(Base::src[i + Base::stride * j]);
}
}
@ -607,6 +627,8 @@ struct ResetDuplicatesBase {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0,
&kpID);
}
#else
(void)name;
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
@ -630,8 +652,9 @@ struct ResetDuplicates : public ResetDuplicatesBase<ExecSpace, ValueType, Op> {
ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name)
: Base(data_in, size_in, name) {}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv(
Base::data[i]);
ScatterValue<ValueType, Op, ExecSpace,
Kokkos::Experimental::ScatterNonAtomic>
sv(Base::data[i]);
sv.reset();
}
};
@ -768,8 +791,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
contribution>
view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<original_value_type, Op,
override_contribution>
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, DeviceType, override_contribution>
value_type;
KOKKOS_INLINE_FUNCTION
@ -777,13 +800,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterNonDuplicated,
KOKKOS_INLINE_FUNCTION
ScatterAccess(view_type const& view_in) : view(view_in) {}
// KOKKOS_DEFAULTED_FUNCTION
// ~ScatterAccess() = default;
KOKKOS_INLINE_FUNCTION
~ScatterAccess()
{
}
KOKKOS_DEFAULTED_FUNCTION
~ScatterAccess() = default;
template <typename... Args>
KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const {
@ -1190,8 +1208,8 @@ class ScatterAccess<DataType, Op, DeviceType, Layout, ScatterDuplicated,
contribution>
view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<original_value_type, Op,
override_contribution>
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, DeviceType, override_contribution>
value_type;
KOKKOS_FORCEINLINE_FUNCTION

View File

@ -112,7 +112,7 @@ struct StaticCrsGraphBalancerFunctor {
}
} else {
if ((count >= (current_block + 1) * cost_per_workset) ||
(iRow + 2 == row_offsets.extent(0))) {
(iRow + 2 == int_type(row_offsets.extent(0)))) {
if (end_block > current_block + 1) {
int_type num_block = end_block - current_block;
row_block_offsets(current_block + 1) = iRow;
@ -358,8 +358,8 @@ class StaticCrsGraph {
/** \brief Destroy this view of the array.
* If the last view then allocated memory is deallocated.
*/
KOKKOS_INLINE_FUNCTION
~StaticCrsGraph() {}
KOKKOS_DEFAULTED_FUNCTION
~StaticCrsGraph() = default;
/** \brief Return number of rows in the graph
*/
@ -396,7 +396,7 @@ class StaticCrsGraph {
const data_type count = static_cast<data_type>(row_map(i + 1) - start);
if (count == 0) {
return GraphRowViewConst<StaticCrsGraph>(NULL, 1, 0);
return GraphRowViewConst<StaticCrsGraph>(nullptr, 1, 0);
} else {
return GraphRowViewConst<StaticCrsGraph>(entries, 1, count, start);
}
@ -414,9 +414,10 @@ class StaticCrsGraph {
row_map_type, View<size_type*, array_layout, device_type> >
partitioner(row_map, block_offsets, fix_cost_per_row, num_blocks);
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, numRows()),
Kokkos::parallel_for("Kokkos::StaticCrsGraph::create_block_partitioning",
Kokkos::RangePolicy<execution_space>(0, numRows()),
partitioner);
Kokkos::fence();
typename device_type::execution_space().fence();
row_block_offsets = block_offsets;
}
@ -522,7 +523,8 @@ DataType maximum_entry(const StaticCrsGraph<DataType, Arg1Type, Arg2Type,
typedef Impl::StaticCrsGraphMaximumEntry<GraphType> FunctorType;
DataType result = 0;
Kokkos::parallel_reduce(graph.entries.extent(0), FunctorType(graph), result);
Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0),
FunctorType(graph), result);
return result;
}

View File

@ -201,9 +201,9 @@ class UnorderedMapInsertResult {
///
template <typename Key, typename Value,
typename Device = Kokkos::DefaultExecutionSpace,
typename Hasher = pod_hash<typename Impl::remove_const<Key>::type>,
typename Hasher = pod_hash<typename std::remove_const<Key>::type>,
typename EqualTo =
pod_equal_to<typename Impl::remove_const<Key>::type> >
pod_equal_to<typename std::remove_const<Key>::type> >
class UnorderedMap {
private:
typedef typename ViewTraits<Key, Device, void, void>::host_mirror_space
@ -215,13 +215,13 @@ class UnorderedMap {
// key_types
typedef Key declared_key_type;
typedef typename Impl::remove_const<declared_key_type>::type key_type;
typedef typename Impl::add_const<key_type>::type const_key_type;
typedef typename std::remove_const<declared_key_type>::type key_type;
typedef typename std::add_const<key_type>::type const_key_type;
// value_types
typedef Value declared_value_type;
typedef typename Impl::remove_const<declared_value_type>::type value_type;
typedef typename Impl::add_const<value_type>::type const_value_type;
typedef typename std::remove_const<declared_value_type>::type value_type;
typedef typename std::add_const<value_type>::type const_value_type;
typedef Device device_type;
typedef typename Device::execution_space execution_space;
@ -296,25 +296,13 @@ class UnorderedMap {
//! \name Public member functions
//@{
UnorderedMap()
: m_bounded_insert(),
m_hasher(),
m_equal_to(),
m_size(),
m_available_indexes(),
m_hash_lists(),
m_next_index(),
m_keys(),
m_values(),
m_scalars() {}
/// \brief Constructor
///
/// \param capacity_hint [in] Initial guess of how many unique keys will be
/// inserted into the map \param hash [in] Hasher function for \c Key
/// instances. The
/// default value usually suffices.
UnorderedMap(size_type capacity_hint, hasher_type hasher = hasher_type(),
UnorderedMap(size_type capacity_hint = 0, hasher_type hasher = hasher_type(),
equal_to_type equal_to = equal_to_type())
: m_bounded_insert(true),
m_hasher(hasher),
@ -689,7 +677,7 @@ class UnorderedMap {
template <typename SKey, typename SValue>
UnorderedMap(
UnorderedMap<SKey, SValue, Device, Hasher, EqualTo> const &src,
typename Impl::enable_if<
typename std::enable_if<
Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type,
SKey, SValue>::value,
int>::type = 0)
@ -705,7 +693,7 @@ class UnorderedMap {
m_scalars(src.m_scalars) {}
template <typename SKey, typename SValue>
typename Impl::enable_if<
typename std::enable_if<
Impl::UnorderedMapCanAssign<declared_key_type, declared_value_type, SKey,
SValue>::value,
declared_map_type &>::type
@ -724,9 +712,9 @@ class UnorderedMap {
}
template <typename SKey, typename SValue, typename SDevice>
typename Impl::enable_if<
std::is_same<typename Impl::remove_const<SKey>::type, key_type>::value &&
std::is_same<typename Impl::remove_const<SValue>::type,
typename std::enable_if<
std::is_same<typename std::remove_const<SKey>::type, key_type>::value &&
std::is_same<typename std::remove_const<SValue>::type,
value_type>::value>::type
create_copy_view(
UnorderedMap<SKey, SValue, SDevice, Hasher, EqualTo> const &src) {

View File

@ -118,12 +118,12 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
if (DV::template need_sync<typename DV::t_dev::device_type>()) {
set_functor_host f(DV::h_view, val);
parallel_for(n, f);
parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_host::execution_space().fence();
DV::template modify<typename DV::t_host::device_type>();
} else {
set_functor f(DV::d_view, val);
parallel_for(n, f);
parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_dev::execution_space().fence();
DV::template modify<typename DV::t_dev::device_type>();
}

View File

@ -77,7 +77,8 @@ struct BitsetCount {
size_type apply() const {
size_type count = 0u;
parallel_reduce(m_bitset.m_blocks.extent(0), *this, count);
parallel_reduce("Kokkos::Impl::BitsetCount::apply",
m_bitset.m_blocks.extent(0), *this, count);
return count;
}

View File

@ -58,8 +58,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view,
typename Impl::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) {
typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) {
return view;
}
#else
@ -70,8 +70,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
view,
typename Impl::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) {
typename std::enable_if<ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0) {
return view;
}
#endif
@ -128,8 +128,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType,
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, SizeType, Arg3Type>&
view,
typename Impl::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
#else
template <class DataType, class Arg1Type, class Arg2Type, class Arg3Type,
typename SizeType>
@ -138,8 +138,8 @@ inline typename StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type,
create_mirror_view(
const StaticCrsGraph<DataType, Arg1Type, Arg2Type, Arg3Type, SizeType>&
view,
typename Impl::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
typename std::enable_if<!ViewTraits<DataType, Arg1Type, Arg2Type,
Arg3Type>::is_hostspace>::type* = 0)
#endif
{
return create_mirror(view);

View File

@ -71,7 +71,10 @@ struct UnorderedMapRehash {
UnorderedMapRehash(map_type const& dst, const_map_type const& src)
: m_dst(dst), m_src(src) {}
void apply() const { parallel_for(m_src.capacity(), *this); }
void apply() const {
parallel_for("Kokkos::Impl::UnorderedMapRehash::apply", m_src.capacity(),
*this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const {
@ -91,7 +94,10 @@ struct UnorderedMapErase {
UnorderedMapErase(map_type const& map) : m_map(map) {}
void apply() const { parallel_for(m_map.m_hash_lists.extent(0), *this); }
void apply() const {
parallel_for("Kokkos::Impl::UnorderedMapErase::apply",
m_map.m_hash_lists.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const {
@ -152,7 +158,10 @@ struct UnorderedMapHistogram {
m_distance("UnorderedMap Histogram"),
m_block_distance("UnorderedMap Histogram") {}
void calculate() { parallel_for(m_map.m_hash_lists.extent(0), *this); }
void calculate() {
parallel_for("Kokkos::Impl::UnorderedMapHistogram::calculate",
m_map.m_hash_lists.extent(0), *this);
}
void clear() {
Kokkos::deep_copy(m_length, 0);
@ -229,7 +238,10 @@ struct UnorderedMapPrint {
UnorderedMapPrint(map_type const& map) : m_map(map) {}
void apply() { parallel_for(m_map.m_hash_lists.extent(0), *this); }
void apply() {
parallel_for("Kokkos::Impl::UnorderedMapPrint::apply",
m_map.m_hash_lists.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const {
@ -245,21 +257,22 @@ struct UnorderedMapPrint {
};
template <typename DKey, typename DValue, typename SKey, typename SValue>
struct UnorderedMapCanAssign : public false_ {};
struct UnorderedMapCanAssign : public std::false_type {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<Key, Value, Key, Value> : public true_ {};
struct UnorderedMapCanAssign<Key, Value, Key, Value> : public std::true_type {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, Value, Key, Value> : public true_ {};
struct UnorderedMapCanAssign<const Key, Value, Key, Value>
: public std::true_type {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, const Value, Key, Value>
: public true_ {};
: public std::true_type {};
template <typename Key, typename Value>
struct UnorderedMapCanAssign<const Key, const Value, const Key, Value>
: public true_ {};
: public std::true_type {};
} // namespace Impl
} // namespace Kokkos

View File

@ -3,7 +3,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
foreach(Tag Threads;Serial;OpenMP;HPX;Cuda)
foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP)
# Because there is always an exception to the rule
if(Tag STREQUAL "Threads")
set(DEVICE "PTHREAD")
@ -13,23 +13,31 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda)
string(TOLOWER ${Tag} dir)
# Add test for that backend if it is enabled
if(Kokkos_ENABLE_${DEVICE})
KOKKOS_ADD_EXECUTABLE_AND_TEST(
UnitTest_${Tag}
SOURCES
UnitTestMain.cpp
${dir}/Test${Tag}_BitSet.cpp
${dir}/Test${Tag}_DualView.cpp
${dir}/Test${Tag}_DynamicView.cpp
${dir}/Test${Tag}_DynRankViewAPI_generic.cpp
${dir}/Test${Tag}_DynRankViewAPI_rank12345.cpp
${dir}/Test${Tag}_DynRankViewAPI_rank67.cpp
${dir}/Test${Tag}_ErrorReporter.cpp
${dir}/Test${Tag}_OffsetView.cpp
${dir}/Test${Tag}_ScatterView.cpp
${dir}/Test${Tag}_StaticCrsGraph.cpp
${dir}/Test${Tag}_UnorderedMap.cpp
${dir}/Test${Tag}_Vector.cpp
${dir}/Test${Tag}_ViewCtorPropEmbeddedDim.cpp
set(UnitTestSources UnitTestMain.cpp)
set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir})
file(MAKE_DIRECTORY ${dir})
foreach(Name
Bitset
DualView
DynamicView
DynViewAPI_generic
DynViewAPI_rank12345
DynViewAPI_rank67
ErrorReporter
OffsetView
ScatterView
StaticCrsGraph
UnorderedMap
Vector
ViewCtorPropEmbeddedDim
)
set(file ${dir}/Test${Tag}_${Name}.cpp)
file(WRITE ${file}
"#include <Test${Tag}_Category.hpp>\n"
"#include <Test${Name}.hpp>\n"
)
list(APPEND UnitTestSources ${file})
endforeach()
KOKKOS_ADD_EXECUTABLE_AND_TEST(UnitTest_${Tag} SOURCES ${UnitTestSources})
endif()
endforeach()

View File

@ -9,7 +9,7 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm
vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/cuda
vpath %.cpp ${CURDIR}
default: build_all
echo "End Build"
@ -31,14 +31,24 @@ KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
TEST_TARGETS =
TARGETS =
TESTS = Bitset DualView DynamicView DynViewAPI_generic DynViewAPI_rank12345 DynViewAPI_rank67 ErrorReporter OffsetView ScatterView StaticCrsGraph UnorderedMap Vector ViewCtorPropEmbeddedDim
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
tmp2 := $(foreach test, $(TESTS), \
$(if $(filter Test$(device)_$(test).cpp, $(shell ls Test$(device)_$(test).cpp 2>/dev/null)),,\
$(shell echo "\#include<Test"$(device)"_Category.hpp>" > Test$(device)_$(test).cpp); \
$(shell echo "\#include<Test"$(test)".hpp>" >> Test$(device)_$(test).cpp); \
)\
) \
)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA = UnitTestMain.o gtest-all.o
OBJ_CUDA += TestCuda_BitSet.o
OBJ_CUDA += TestCuda_Bitset.o
OBJ_CUDA += TestCuda_DualView.o
OBJ_CUDA += TestCuda_DynamicView.o
OBJ_CUDA += TestCuda_DynRankViewAPI_generic.o
OBJ_CUDA += TestCuda_DynRankViewAPI_rank12345.o
OBJ_CUDA += TestCuda_DynRankViewAPI_rank67.o
OBJ_CUDA += TestCuda_DynViewAPI_generic.o
OBJ_CUDA += TestCuda_DynViewAPI_rank12345.o
OBJ_CUDA += TestCuda_DynViewAPI_rank67.o
OBJ_CUDA += TestCuda_ErrorReporter.o
OBJ_CUDA += TestCuda_OffsetView.o
OBJ_CUDA += TestCuda_ScatterView.o
@ -50,33 +60,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
TEST_TARGETS += test-cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
OBJ_ROCM = UnitTestMain.o gtest-all.o
OBJ_ROCM += TestROCm_BitSet.o
OBJ_ROCM += TestROCm_DualView.o
OBJ_ROCM += TestROCm_DynamicView.o
OBJ_ROCM += TestROCm_DynRankViewAPI_generic.o
OBJ_ROCM += TestROCm_DynRankViewAPI_rank12345.o
OBJ_ROCM += TestROCm_DynRankViewAPI_rank67.o
OBJ_ROCM += TestROCm_ErrorReporter.o
OBJ_ROCM += TestROCm_OffsetView.o
OBJ_ROCM += TestROCm_ScatterView.o
OBJ_ROCM += TestROCm_StaticCrsGraph.o
OBJ_ROCM += TestROCm_UnorderedMap.o
OBJ_ROCM += TestROCm_Vector.o
OBJ_ROCM += TestROCm_ViewCtorPropEmbeddedDim.o
TARGETS += KokkosContainers_UnitTest_ROCm
TEST_TARGETS += test-rocm
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_THREADS = UnitTestMain.o gtest-all.o
OBJ_THREADS += TestThreads_BitSet.o
OBJ_THREADS += TestThreads_Bitset.o
OBJ_THREADS += TestThreads_DualView.o
OBJ_THREADS += TestThreads_DynamicView.o
OBJ_THREADS += TestThreads_DynRankViewAPI_generic.o
OBJ_THREADS += TestThreads_DynRankViewAPI_rank12345.o
OBJ_THREADS += TestThreads_DynRankViewAPI_rank67.o
OBJ_THREADS += TestThreads_DynViewAPI_generic.o
OBJ_THREADS += TestThreads_DynViewAPI_rank12345.o
OBJ_THREADS += TestThreads_DynViewAPI_rank67.o
OBJ_THREADS += TestThreads_ErrorReporter.o
OBJ_THREADS += TestThreads_OffsetView.o
OBJ_THREADS += TestThreads_ScatterView.o
@ -90,12 +81,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = UnitTestMain.o gtest-all.o
OBJ_OPENMP += TestOpenMP_BitSet.o
OBJ_OPENMP += TestOpenMP_Bitset.o
OBJ_OPENMP += TestOpenMP_DualView.o
OBJ_OPENMP += TestOpenMP_DynamicView.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_generic.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank12345.o
OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank67.o
OBJ_OPENMP += TestOpenMP_DynViewAPI_generic.o
OBJ_OPENMP += TestOpenMP_DynViewAPI_rank12345.o
OBJ_OPENMP += TestOpenMP_DynViewAPI_rank67.o
OBJ_OPENMP += TestOpenMP_ErrorReporter.o
OBJ_OPENMP += TestOpenMP_OffsetView.o
OBJ_OPENMP += TestOpenMP_ScatterView.o
@ -109,12 +100,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
OBJ_HPX = UnitTestMain.o gtest-all.o
OBJ_HPX += TestHPX_BitSet.o
OBJ_HPX += TestHPX_Bitset.o
OBJ_HPX += TestHPX_DualView.o
OBJ_HPX += TestHPX_DynamicView.o
OBJ_HPX += TestHPX_DynRankViewAPI_generic.o
OBJ_HPX += TestHPX_DynRankViewAPI_rank12345.o
OBJ_HPX += TestHPX_DynRankViewAPI_rank67.o
OBJ_HPX += TestHPX_DynViewAPI_generic.o
OBJ_HPX += TestHPX_DynViewAPI_rank12345.o
OBJ_HPX += TestHPX_DynViewAPI_rank67.o
OBJ_HPX += TestHPX_ErrorReporter.o
OBJ_HPX += TestHPX_OffsetView.o
OBJ_HPX += TestHPX_ScatterView.o
@ -128,12 +119,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL = UnitTestMain.o gtest-all.o
OBJ_SERIAL += TestSerial_BitSet.o
OBJ_SERIAL += TestSerial_Bitset.o
OBJ_SERIAL += TestSerial_DualView.o
OBJ_SERIAL += TestSerial_DynamicView.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_generic.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_rank12345.o
OBJ_SERIAL += TestSerial_DynRankViewAPI_rank67.o
OBJ_SERIAL += TestSerial_DynViewAPI_generic.o
OBJ_SERIAL += TestSerial_DynViewAPI_rank12345.o
OBJ_SERIAL += TestSerial_DynViewAPI_rank67.o
OBJ_SERIAL += TestSerial_ErrorReporter.o
OBJ_SERIAL += TestSerial_OffsetView.o
OBJ_SERIAL += TestSerial_ScatterView.o
@ -148,9 +139,6 @@ endif
KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda
KokkosContainers_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_ROCm
KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads
@ -166,9 +154,6 @@ KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
test-cuda: KokkosContainers_UnitTest_Cuda
./KokkosContainers_UnitTest_Cuda
test-rocm: KokkosContainers_UnitTest_ROCm
./KokkosContainers_UnitTest_ROCm
test-threads: KokkosContainers_UnitTest_Threads
./KokkosContainers_UnitTest_Threads
@ -186,7 +171,7 @@ build_all: $(TARGETS)
test: $(TEST_TARGETS)
clean: kokkos-clean
rm -f *.o $(TARGETS)
rm -f *.o $(TARGETS) *.cpp
# Compilation rules

View File

@ -253,8 +253,10 @@ void test_bitset() {
}
}
// FIXME_HIP deadlock
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, bitset) { test_bitset<TEST_EXECSPACE>(); }
#endif
} // namespace Test
#endif // KOKKOS_TEST_BITSET_HPP

View File

@ -67,11 +67,17 @@ struct test_dualview_combinations {
Scalar result;
template <typename ViewType>
Scalar run_me(unsigned int n, unsigned int m) {
Scalar run_me(unsigned int n, unsigned int m, bool with_init) {
if (n < 10) n = 10;
if (m < 3) m = 3;
ViewType a("A", n, m);
ViewType a;
if (with_init) {
a = ViewType("A", n, m);
} else {
a = ViewType(Kokkos::ViewAllocateWithoutInitializing("A"), n, m);
}
Kokkos::deep_copy(a.d_view, 1);
a.template modify<typename ViewType::execution_space>();
@ -96,9 +102,9 @@ struct test_dualview_combinations {
return count - a.d_view.extent(0) * a.d_view.extent(1) - 2 - 4 - 3 * 2;
}
test_dualview_combinations(unsigned int size) {
test_dualview_combinations(unsigned int size, bool with_init) {
result = run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(
size, 3);
size, 3, with_init);
}
};
@ -124,18 +130,25 @@ struct test_dual_view_deep_copy {
typedef Device execution_space;
template <typename ViewType>
void run_me() {
const unsigned int n = 10;
const unsigned int m = 5;
const unsigned int sum_total = n * m;
ViewType a("A", n, m);
ViewType b("B", n, m);
void run_me(int n, const int m, const bool use_templ_sync) {
ViewType a, b;
if (n >= 0) {
a = ViewType("A", n, m);
b = ViewType("B", n, m);
} else {
n = 0;
}
const scalar_type sum_total = scalar_type(n * m);
Kokkos::deep_copy(a.d_view, 1);
a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
if (use_templ_sync) {
a.template modify<typename ViewType::execution_space>();
a.template sync<typename ViewType::host_mirror_space>();
} else {
a.modify_device();
a.sync_host();
}
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
@ -159,7 +172,11 @@ struct test_dual_view_deep_copy {
// Test deep_copy
Kokkos::deep_copy(b, a);
b.template sync<typename ViewType::host_mirror_space>();
if (use_templ_sync) {
b.template sync<typename ViewType::host_mirror_space>();
} else {
b.sync_host();
}
// Perform same checks on b as done on a
// Check device view is initialized as expected
@ -183,6 +200,145 @@ struct test_dual_view_deep_copy {
} // end run_me
test_dual_view_deep_copy() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(10, 5,
false);
// Test zero length but allocated (a.d_view.data!=nullptr but
// a.d_view.span()==0)
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5, true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(0, 5,
false);
// Test default constructed view
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
true);
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >(-1, 5,
false);
}
};
template <typename Scalar, class Device>
struct test_dualview_resize {
typedef Scalar scalar_type;
typedef Device execution_space;
template <typename ViewType>
void run_me() {
const unsigned int n = 10;
const unsigned int m = 5;
const unsigned int factor = 2;
ViewType a("A", n, m);
Kokkos::deep_copy(a.d_view, 1);
/* Covers case "Resize on Device" */
a.modify_device();
Kokkos::resize(a, factor * n, factor * m);
ASSERT_EQ(a.extent(0), n * factor);
ASSERT_EQ(a.extent(1), m * factor);
Kokkos::deep_copy(a.d_view, 1);
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a_d_sum);
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
/* Covers case "Resize on Host" */
a.modify_host();
Kokkos::resize(a, n / factor, m / factor);
ASSERT_EQ(a.extent(0), n / factor);
ASSERT_EQ(a.extent(1), m / factor);
a.sync_device();
// Check device view is initialized as expected
a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
ASSERT_EQ(a_h_sum, a_d_sum);
} // end run_me
test_dualview_resize() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
}
};
template <typename Scalar, class Device>
struct test_dualview_realloc {
typedef Scalar scalar_type;
typedef Device execution_space;
template <typename ViewType>
void run_me() {
const unsigned int n = 10;
const unsigned int m = 5;
ViewType a("A", n, m);
Kokkos::realloc(a, n, m);
Kokkos::deep_copy(a.d_view, 1);
a.modify_device();
a.sync_host();
// Check device view is initialized as expected
scalar_type a_d_sum = 0;
// Execute on the execution_space associated with t_dev's memory space
typedef typename ViewType::t_dev::memory_space::execution_space
t_dev_exec_space;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<t_dev_exec_space>(0, a.d_view.extent(0)),
SumViewEntriesFunctor<scalar_type, typename ViewType::t_dev>(a.d_view),
a_d_sum);
// Check host view is synced as expected
scalar_type a_h_sum = 0;
for (size_t i = 0; i < a.h_view.extent(0); ++i)
for (size_t j = 0; j < a.h_view.extent(1); ++j) {
a_h_sum += a.h_view(i, j);
}
// Check
ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1));
ASSERT_EQ(a_h_sum, a_d_sum);
} // end run_me
test_dualview_realloc() {
run_me<Kokkos::DualView<Scalar**, Kokkos::LayoutLeft, Device> >();
}
};
@ -190,8 +346,8 @@ struct test_dual_view_deep_copy {
} // namespace Impl
template <typename Scalar, typename Device>
void test_dualview_combinations(unsigned int size) {
Impl::test_dualview_combinations<Scalar, Device> test(size);
void test_dualview_combinations(unsigned int size, bool with_init) {
Impl::test_dualview_combinations<Scalar, Device> test(size, with_init);
ASSERT_EQ(test.result, 0);
}
@ -200,8 +356,22 @@ void test_dualview_deep_copy() {
Impl::test_dual_view_deep_copy<Scalar, Device>();
}
template <typename Scalar, typename Device>
void test_dualview_realloc() {
Impl::test_dualview_realloc<Scalar, Device>();
}
template <typename Scalar, typename Device>
void test_dualview_resize() {
Impl::test_dualview_resize<Scalar, Device>();
}
TEST(TEST_CATEGORY, dualview_combination) {
test_dualview_combinations<int, TEST_EXECSPACE>(10);
test_dualview_combinations<int, TEST_EXECSPACE>(10, true);
}
TEST(TEST_CATEGORY, dualview_combinations_without_init) {
test_dualview_combinations<int, TEST_EXECSPACE>(10, false);
}
TEST(TEST_CATEGORY, dualview_deep_copy) {
@ -209,6 +379,14 @@ TEST(TEST_CATEGORY, dualview_deep_copy) {
test_dualview_deep_copy<double, TEST_EXECSPACE>();
}
TEST(TEST_CATEGORY, dualview_realloc) {
test_dualview_realloc<int, TEST_EXECSPACE>();
}
TEST(TEST_CATEGORY, dualview_resize) {
test_dualview_resize<int, TEST_EXECSPACE>();
}
} // namespace Test
#endif // KOKKOS_TEST_UNORDERED_MAP_HPP
#endif // KOKKOS_TEST_DUALVIEW_HPP

View File

@ -706,8 +706,6 @@ class TestDynViewAPI {
typedef typename View0::host_mirror_space host_view_space;
TestDynViewAPI() {}
static void run_tests() {
run_test_resize_realloc();
run_test_mirror();
@ -1078,12 +1076,12 @@ class TestDynViewAPI {
ASSERT_TRUE(Kokkos::is_dyn_rank_view<dView0>::value);
ASSERT_FALSE(Kokkos::is_dyn_rank_view<Kokkos::View<double> >::value);
ASSERT_TRUE(dx.data() == 0); // Okay with UVM
ASSERT_TRUE(dy.data() == 0); // Okay with UVM
ASSERT_TRUE(dz.data() == 0); // Okay with UVM
ASSERT_TRUE(hx.data() == 0);
ASSERT_TRUE(hy.data() == 0);
ASSERT_TRUE(hz.data() == 0);
ASSERT_TRUE(dx.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dy.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dz.data() == nullptr); // Okay with UVM
ASSERT_TRUE(hx.data() == nullptr);
ASSERT_TRUE(hy.data() == nullptr);
ASSERT_TRUE(hz.data() == nullptr);
ASSERT_EQ(dx.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dy.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dz.extent(0), 0u); // Okay with UVM
@ -1154,11 +1152,11 @@ class TestDynViewAPI {
ASSERT_EQ(dx.use_count(), size_t(2));
ASSERT_FALSE(dx.data() == 0);
ASSERT_FALSE(const_dx.data() == 0);
ASSERT_FALSE(unmanaged_dx.data() == 0);
ASSERT_FALSE(unmanaged_from_ptr_dx.data() == 0);
ASSERT_FALSE(dy.data() == 0);
ASSERT_FALSE(dx.data() == nullptr);
ASSERT_FALSE(const_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_from_ptr_dx.data() == nullptr);
ASSERT_FALSE(dy.data() == nullptr);
ASSERT_NE(dx, dy);
ASSERT_EQ(dx.extent(0), unsigned(N0));
@ -1318,17 +1316,17 @@ class TestDynViewAPI {
ASSERT_NE(dx, dz);
dx = dView0();
ASSERT_TRUE(dx.data() == 0);
ASSERT_FALSE(dy.data() == 0);
ASSERT_FALSE(dz.data() == 0);
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_FALSE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == nullptr);
dy = dView0();
ASSERT_TRUE(dx.data() == 0);
ASSERT_TRUE(dy.data() == 0);
ASSERT_FALSE(dz.data() == 0);
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == nullptr);
dz = dView0();
ASSERT_TRUE(dx.data() == 0);
ASSERT_TRUE(dy.data() == 0);
ASSERT_TRUE(dz.data() == 0);
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == nullptr);
ASSERT_TRUE(dz.data() == nullptr);
// View - DynRankView Interoperability tests
// deep_copy from view to dynrankview

View File

@ -44,7 +44,10 @@
#include <TestDynViewAPI.hpp>
namespace Test {
// FIXME_HIP attempt to access inaccessible memory space
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_generic) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_tests();
}
#endif
} // namespace Test

View File

@ -45,7 +45,10 @@
#include <TestDynViewAPI.hpp>
namespace Test {
// FIXME_HIP failing with wrong value
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) {
TestDynViewAPI<double, TEST_EXECSPACE>::run_operator_test_rank12345();
}
#endif
} // namespace Test

View File

@ -79,7 +79,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -93,7 +92,6 @@ struct TestDynamicView {
result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif
// add 3x more entries i.e. 4x larger than previous size
@ -103,7 +101,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(da_size, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -118,7 +115,6 @@ struct TestDynamicView {
ASSERT_EQ(new_result_sum + result_sum,
(value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif
} // end scope
@ -135,7 +131,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -149,7 +144,6 @@ struct TestDynamicView {
result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif
// add 3x more entries i.e. 4x larger than previous size
@ -159,7 +153,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(da_size, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -174,7 +167,6 @@ struct TestDynamicView {
ASSERT_EQ(new_result_sum + result_sum,
(value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif
} // end scope
@ -191,7 +183,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_size);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -205,7 +196,6 @@ struct TestDynamicView {
result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
#endif
// remove the final 3/4 entries i.e. first 1/4 remain
@ -214,7 +204,6 @@ struct TestDynamicView {
ASSERT_EQ(da.size(), da_resize);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_resize),
KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); });
@ -228,7 +217,6 @@ struct TestDynamicView {
new_result_sum);
ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2));
#endif
#endif
} // end scope
}

View File

@ -50,9 +50,13 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_ErrorReporter.hpp>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
namespace Test {
// Just save the data in the report. Informative text goies in the
// Just save the data in the report. Informative text goes in the
// operator<<(..).
template <typename DataType1, typename DataType2, typename DataType3>
struct ThreeValReport {
@ -85,7 +89,7 @@ struct ErrorReporterDriverBase {
error_reporter_type;
error_reporter_type m_errorReporter;
ErrorReporterDriverBase(int reporter_capacity, int test_size)
ErrorReporterDriverBase(int reporter_capacity, int /*test_size*/)
: m_errorReporter(reporter_capacity) {}
KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const {
@ -176,7 +180,8 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
}
};
#if defined(KOKKOS_CLASS_LAMBDA)
#if defined(KOKKOS_CLASS_LAMBDA) && \
(!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA))
template <typename DeviceType>
struct ErrorReporterDriverUseLambda
: public ErrorReporterDriverBase<DeviceType> {
@ -225,7 +230,8 @@ struct ErrorReporterDriverNativeOpenMP
};
#endif
#if defined(KOKKOS_CLASS_LAMBDA)
#if defined(KOKKOS_CLASS_LAMBDA) && \
(!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA))
TEST(TEST_CATEGORY, ErrorReporterViaLambda) {
TestErrorReporter<ErrorReporterDriverUseLambda<TEST_EXECSPACE>>();
}

View File

@ -42,10 +42,10 @@
//@HEADER
*/
#ifndef KOKKOS_TEST_ROCM_HPP
#define KOKKOS_TEST_ROCM_HPP
#ifndef KOKKOS_TEST_HIP_HPP
#define KOKKOS_TEST_HIP_HPP
#define TEST_CATEGORY rocm
#define TEST_EXECSPACE Kokkos::Experimental::ROCm
#define TEST_CATEGORY hip
#define TEST_EXECSPACE Kokkos::Experimental::HIP
#endif

View File

@ -60,7 +60,7 @@ using std::endl;
namespace Test {
template <typename Scalar, typename Device>
void test_offsetview_construction(unsigned int size) {
void test_offsetview_construction() {
typedef Kokkos::Experimental::OffsetView<Scalar**, Device> offset_view_type;
typedef Kokkos::View<Scalar**, Device> view_type;
@ -185,15 +185,17 @@ void test_offsetview_construction(unsigned int size) {
Kokkos::deep_copy(view3D, 1);
Kokkos::Array<int64_t, 3> begins = {{-10, -20, -30}};
Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D,
begins);
typedef Kokkos::MDRangePolicy<Device, Kokkos::Rank<3>,
Kokkos::IndexType<int64_t> >
range3_type;
typedef typename range3_type::point_type point3_type;
typename point3_type::value_type begins0 = -10, begins1 = -20,
begins2 = -30;
Kokkos::Array<int64_t, 3> begins = {{begins0, begins1, begins2}};
Kokkos::Experimental::OffsetView<Scalar***, Device> offsetView3D(view3D,
begins);
range3_type rangePolicy3DZero(point3_type{{0, 0, 0}},
point3_type{{extent0, extent1, extent2}});
@ -207,9 +209,8 @@ void test_offsetview_construction(unsigned int size) {
view3DSum);
range3_type rangePolicy3D(
point3_type{{begins[0], begins[1], begins[2]}},
point3_type{
{begins[0] + extent0, begins[1] + extent1, begins[2] + extent2}});
point3_type{{begins0, begins1, begins2}},
point3_type{{begins0 + extent0, begins1 + extent1, begins2 + extent2}});
int offsetView3DSum = 0;
Kokkos::parallel_reduce(
@ -388,7 +389,7 @@ void test_offsetview_unmanaged_construction() {
}
template <typename Scalar, typename Device>
void test_offsetview_subview(unsigned int size) {
void test_offsetview_subview() {
{ // test subview 1
Kokkos::Experimental::OffsetView<Scalar*, Device> sliceMe("offsetToSlice",
{-10, 20});
@ -675,7 +676,7 @@ void test_offsetview_offsets_rank3() {
#endif
TEST(TEST_CATEGORY, offsetview_construction) {
test_offsetview_construction<int, TEST_EXECSPACE>(10);
test_offsetview_construction<int, TEST_EXECSPACE>();
}
TEST(TEST_CATEGORY, offsetview_unmanaged_construction) {
@ -683,7 +684,7 @@ TEST(TEST_CATEGORY, offsetview_unmanaged_construction) {
}
TEST(TEST_CATEGORY, offsetview_subview) {
test_offsetview_subview<int, TEST_EXECSPACE>(10);
test_offsetview_subview<int, TEST_EXECSPACE>();
}
#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)

View File

@ -50,21 +50,21 @@
namespace Test {
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution, int op>
struct test_scatter_view_impl_cls;
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterSum> {
public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace,
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterSum,
duplication, contribution>
scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type;
typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view;
int scatterSize;
@ -90,7 +90,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) {
scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum");
}
@ -123,17 +124,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
}
};
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterProd> {
public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace,
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterProd,
duplication, contribution>
scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type;
typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view;
int scatterSize;
@ -159,7 +160,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) {
scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
}
@ -192,17 +194,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
}
};
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterMin> {
public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace,
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterMin,
duplication, contribution>
scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type;
typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view;
int scatterSize;
@ -228,7 +230,8 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) {
scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
auto policy =
Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
}
@ -261,17 +264,17 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
}
};
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution>
struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
struct test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
Kokkos::Experimental::ScatterMax> {
public:
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, ExecSpace,
typedef Kokkos::Experimental::ScatterView<double * [3], Layout, DeviceType,
Kokkos::Experimental::ScatterMax,
duplication, contribution>
scatter_view_type;
typedef Kokkos::View<double * [3], Layout, ExecSpace> orig_view_type;
typedef Kokkos::View<double * [3], Layout, DeviceType> orig_view_type;
scatter_view_type scatter_view;
int scatterSize;
@ -297,7 +300,7 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
void run_parallel(int n) {
scatterSize = n;
auto policy = Kokkos::RangePolicy<ExecSpace, int>(0, n);
Kokkos::RangePolicy<typename DeviceType::execution_space, int> policy(0, n);
Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
}
@ -330,20 +333,18 @@ struct test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
}
};
template <typename ExecSpace, typename Layout, int duplication,
template <typename DeviceType, typename Layout, int duplication,
int contribution, int op>
struct test_scatter_view_config {
public:
typedef
typename test_scatter_view_impl_cls<ExecSpace, Layout, duplication,
typename test_scatter_view_impl_cls<DeviceType, Layout, duplication,
contribution, op>::scatter_view_type
scatter_view_def;
typedef typename test_scatter_view_impl_cls<ExecSpace, Layout, duplication,
typedef typename test_scatter_view_impl_cls<DeviceType, Layout, duplication,
contribution, op>::orig_view_type
orig_view_def;
test_scatter_view_config() {}
void run_test(int n) {
// Test creation via create_scatter_view
{
@ -351,7 +352,7 @@ struct test_scatter_view_config {
scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view<
op, duplication, contribution>(original_view);
test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
op>
scatter_view_test_impl(scatter_view);
scatter_view_test_impl.initialize(original_view);
@ -379,7 +380,7 @@ struct test_scatter_view_config {
orig_view_def original_view("original_view", n);
scatter_view_def scatter_view(original_view);
test_scatter_view_impl_cls<ExecSpace, Layout, duplication, contribution,
test_scatter_view_impl_cls<DeviceType, Layout, duplication, contribution,
op>
scatter_view_test_impl(scatter_view);
scatter_view_test_impl.initialize(original_view);
@ -405,17 +406,18 @@ struct test_scatter_view_config {
}
};
template <typename ExecSpace, int ScatterType>
template <typename DeviceType, int ScatterType>
struct TestDuplicatedScatterView {
TestDuplicatedScatterView(int n) {
// ScatterSum test
test_scatter_view_config<
ExecSpace, Kokkos::LayoutRight, Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic, ScatterType>
test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic,
ScatterType>
test_sv_right_config;
test_sv_right_config.run_test(n);
test_scatter_view_config<
ExecSpace, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated,
DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated,
Kokkos::Experimental::ScatterNonAtomic, ScatterType>
test_sv_left_config;
test_sv_left_config.run_test(n);
@ -429,6 +431,16 @@ template <int ScatterType>
struct TestDuplicatedScatterView<Kokkos::Cuda, ScatterType> {
TestDuplicatedScatterView(int) {}
};
template <int ScatterType>
struct TestDuplicatedScatterView<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, ScatterType> {
TestDuplicatedScatterView(int) {}
};
template <int ScatterType>
struct TestDuplicatedScatterView<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, ScatterType> {
TestDuplicatedScatterView(int) {}
};
#endif
#ifdef KOKKOS_ENABLE_ROCM
@ -440,17 +452,15 @@ struct TestDuplicatedScatterView<Kokkos::Experimental::ROCm, ScatterType> {
};
#endif
template <typename ExecSpace, int ScatterType>
template <typename DeviceType, int ScatterType>
void test_scatter_view(int n) {
// all of these configurations should compile okay, but only some of them are
// correct and/or sensible in terms of memory use
Kokkos::Experimental::UniqueToken<ExecSpace> unique_token{ExecSpace()};
using execution_space = typename DeviceType::execution_space;
// no atomics or duplication is only sensible if the execution space
// is running essentially in serial (doesn't have to be Serial though,
// we also test OpenMP with one thread: LAMMPS cares about that)
if (unique_token.size() == 1) {
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
if (execution_space().concurrency() == 1) {
test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterNonAtomic,
ScatterType>
@ -458,9 +468,9 @@ void test_scatter_view(int n) {
test_sv_config.run_test(n);
}
#ifdef KOKKOS_ENABLE_SERIAL
if (!std::is_same<ExecSpace, Kokkos::Serial>::value) {
if (!std::is_same<DeviceType, Kokkos::Serial>::value) {
#endif
test_scatter_view_config<ExecSpace, Kokkos::LayoutRight,
test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
Kokkos::Experimental::ScatterNonDuplicated,
Kokkos::Experimental::ScatterAtomic, ScatterType>
test_sv_config;
@ -473,16 +483,18 @@ void test_scatter_view(int n) {
constexpr std::size_t maximum_allowed_total_bytes =
8ull * 1024ull * 1024ull * 1024ull;
std::size_t const maximum_allowed_copy_bytes =
maximum_allowed_total_bytes / std::size_t(unique_token.size());
maximum_allowed_total_bytes /
std::size_t(execution_space().concurrency());
constexpr std::size_t bytes_per_value = sizeof(double) * 3;
std::size_t const maximum_allowed_copy_values =
maximum_allowed_copy_bytes / bytes_per_value;
n = std::min(n, int(maximum_allowed_copy_values));
TestDuplicatedScatterView<ExecSpace, ScatterType> duptest(n);
TestDuplicatedScatterView<DeviceType, ScatterType> duptest(n);
}
// FIXME_HIP ScatterView requires UniqueToken
#ifndef KOKKOS_ENABLE_HIP
TEST(TEST_CATEGORY, scatterview) {
#ifndef KOKKOS_ENABLE_ROCM
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10);
@ -504,8 +516,38 @@ TEST(TEST_CATEGORY, scatterview) {
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n);
test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n);
}
TEST(TEST_CATEGORY, scatterview_devicetype) {
using device_type =
Kokkos::Device<TEST_EXECSPACE, typename TEST_EXECSPACE::memory_space>;
test_scatter_view<device_type, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterMin>(10);
test_scatter_view<device_type, Kokkos::Experimental::ScatterMax>(10);
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same<TEST_EXECSPACE, Kokkos::Cuda>::value) {
using cuda_device_type = Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>;
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterSum>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterProd>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterMin>(10);
test_scatter_view<cuda_device_type, Kokkos::Experimental::ScatterMax>(10);
using cudauvm_device_type =
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>;
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterSum>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterProd>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterMin>(
10);
test_scatter_view<cudauvm_device_type, Kokkos::Experimental::ScatterMax>(
10);
}
#endif
}
#endif
} // namespace Test

View File

@ -174,6 +174,9 @@ struct TestFind {
} // namespace Impl
// MSVC reports a syntax error for this test.
// WORKAROUND MSVC
#ifndef _WIN32
template <typename Device>
void test_insert(uint32_t num_nodes, uint32_t num_inserts,
uint32_t num_duplicates, bool near) {
@ -225,6 +228,7 @@ void test_insert(uint32_t num_nodes, uint32_t num_inserts,
EXPECT_EQ(0u, map.size());
}
}
#endif
template <typename Device>
void test_failed_insert(uint32_t num_nodes) {
@ -291,12 +295,17 @@ void test_deep_copy(uint32_t num_nodes) {
}
}
// FIXME_HIP deadlock
#ifndef KOKKOS_ENABLE_HIP
// WORKAROUND MSVC
#ifndef _WIN32
TEST(TEST_CATEGORY, UnorderedMap_insert) {
for (int i = 0; i < 500; ++i) {
test_insert<TEST_EXECSPACE>(100000, 90000, 100, true);
test_insert<TEST_EXECSPACE>(100000, 90000, 100, false);
}
}
#endif
TEST(TEST_CATEGORY, UnorderedMap_failed_insert) {
for (int i = 0; i < 1000; ++i) test_failed_insert<TEST_EXECSPACE>(10000);
@ -305,6 +314,19 @@ TEST(TEST_CATEGORY, UnorderedMap_failed_insert) {
TEST(TEST_CATEGORY, UnorderedMap_deep_copy) {
for (int i = 0; i < 2; ++i) test_deep_copy<TEST_EXECSPACE>(10000);
}
#endif
TEST(TEST_CATEGORY, UnorderedMap_valid_empty) {
using Key = int;
using Value = int;
using Map = Kokkos::UnorderedMap<Key, Value, TEST_EXECSPACE>;
Map m{};
Map n{};
n = Map{m.capacity()};
n.rehash(m.capacity());
Kokkos::deep_copy(n, m);
}
} // namespace Test

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestDynViewAPI_rank67.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestScatterView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestStaticCrsGraph.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestUnorderedMap.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cuda/TestCuda_Category.hpp>
#include <TestViewCtorPropEmbeddedDim.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_generic.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_rank12345.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynViewAPI_rank67.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestDynamicView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestErrorReporter.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestOffsetView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestScatterView.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestStaticCrsGraph.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestUnorderedMap.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hpx/TestHPX_Category.hpp>
#include <TestViewCtorPropEmbeddedDim.hpp>

View File

@ -1,47 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <openmp/TestOpenMP_Category.hpp>
#include <TestDynViewAPI_generic.hpp>

Some files were not shown because too many files have changed in this diff Show More