Merge branch 'master' into kk_copycpu

This commit is contained in:
Axel Kohlmeyer 2020-04-28 11:56:22 -04:00
commit 21cb0d394f
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
679 changed files with 26945 additions and 15386 deletions

2
.github/CODEOWNERS vendored
View File

@ -45,7 +45,7 @@ src/GPU/pair_vashishta_gpu.* @andeplane
src/KOKKOS/pair_vashishta_kokkos.* @andeplane
src/MANYBODY/pair_vashishta_table.* @andeplane
src/MANYBODY/pair_atm.* @sergeylishchuk
src/USER-MISC/fix_bond_react.* @jrgissing
src/USER-REACTION/fix_bond_react.* @jrgissing
src/USER-MISC/*_grem.* @dstelter92
src/USER-MISC/compute_stress_mop*.* @RomainVermorel

View File

@ -135,23 +135,20 @@ if(PKG_USER-ADIOS)
target_link_libraries(lammps PRIVATE adios2::adios2)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL Windows)
option(BUILD_MPI "Build MPI version" OFF)
else()
# do MPI detection after language activation,
# in case MPI for these languages is required
if(NOT CMAKE_CROSSCOMPILING)
set(MPI_CXX_SKIP_MPICXX TRUE)
find_package(MPI QUIET)
option(BUILD_MPI "Build MPI version" ${MPI_FOUND})
else()
option(BUILD_MPI "Build MPI version" OFF)
endif()
if(BUILD_MPI)
# We use a non-standard procedure to compile with MPI on windows
if (CMAKE_SYSTEM_NAME STREQUAL Windows)
# We use a non-standard procedure to cross-compile with MPI on Windows
if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
include(MPI4WIN)
target_link_libraries(lammps PUBLIC MPI::MPI_CXX)
else()
set(MPI_CXX_SKIP_MPICXX ON)
find_package(MPI REQUIRED)
target_link_libraries(lammps PUBLIC MPI::MPI_CXX)
option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF)
@ -332,8 +329,9 @@ set(CMAKE_TUNE_FLAGS "${CMAKE_TUNE_DEFAULT}" CACHE STRING "Compiler specific opt
separate_arguments(CMAKE_TUNE_FLAGS)
include(CheckCXXCompilerFlag)
foreach(_FLAG ${CMAKE_TUNE_FLAGS})
check_cxx_compiler_flag("${_FLAG}" COMPILER_SUPPORTS${_FLAG})
if(COMPILER_SUPPORTS${_FLAG})
string(REGEX REPLACE "[=\"]" "" _FLAGX ${_FLAG})
check_cxx_compiler_flag("${_FLAG}" COMPILER_SUPPORTS${_FLAGX})
if(COMPILER_SUPPORTS${_FLAGX})
target_compile_options(lammps PRIVATE ${_FLAG})
else()
message(WARNING "${_FLAG} found in CMAKE_TUNE_FLAGS, but not supported by the compiler, skipping")
@ -705,6 +703,7 @@ else()
endif()
if(BUILD_MPI)
message(STATUS "<<< MPI flags >>>
-- MPI_defines: ${MPI_CXX_COMPILE_DEFINITIONS}
-- MPI includes: ${MPI_CXX_INCLUDE_PATH}
-- MPI libraries: ${MPI_CXX_LIBRARIES};${MPI_Fortran_LIBRARIES}")
endif()

View File

@ -23,3 +23,8 @@ set_target_properties(MPI::MPI_CXX PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${SOURCE_DIR}/include"
INTERFACE_COMPILE_DEFINITIONS "MPICH_SKIP_MPICXX")
add_dependencies(MPI::MPI_CXX mpi4win_build)
# set variables for status reporting at the end of CMake run
set(MPI_CXX_INCLUDE_PATH "${SOURCE_DIR}/include")
set(MPI_CXX_COMPILE_DEFINITIONS "MPICH_SKIP_MPICXX")
set(MPI_CXX_LIBRARIES "${SOURCE_DIR}/lib/libmpi.a")

View File

@ -330,7 +330,7 @@ elseif(GPU_API STREQUAL "HIP")
if(HIP_PLATFORM STREQUAL "nvcc")
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/include)
target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/../include)
target_include_directories(gpu PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(gpu PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
@ -338,6 +338,12 @@ elseif(GPU_API STREQUAL "HIP")
target_include_directories(hip_get_devices PRIVATE ${HIP_ROOT_DIR}/include)
target_include_directories(hip_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
target_link_libraries(hip_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
elseif(HIP_PLATFORM STREQUAL "hcc")
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_HCC__)
target_include_directories(gpu PRIVATE ${HIP_ROOT_DIR}/../include)
target_compile_definitions(hip_get_devices PRIVATE -D__HIP_PLATFORM_HCC__)
target_include_directories(hip_get_devices PRIVATE ${HIP_ROOT_DIR}/../include)
endif()
target_link_libraries(lammps PRIVATE gpu)
@ -353,7 +359,12 @@ RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
target_link_libraries(gpu PRIVATE MPI::MPI_CXX)
if(NOT BUILD_MPI)
# mpistubs is aliased to MPI::MPI_CXX, but older versions of cmake won't work forward the include path
target_link_libraries(gpu PRIVATE mpi_stubs)
else()
target_link_libraries(gpu PRIVATE MPI::MPI_CXX)
endif()
if(NOT BUILD_SHARED_LIBS)
install(TARGETS gpu EXPORT LAMMPS_Targets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()

View File

@ -1,7 +1,12 @@
set(KIM-API_MIN_VERSION 2.1.3)
find_package(CURL)
if(CURL_FOUND)
target_link_libraries(lammps PRIVATE CURL::libcurl)
if(CMAKE_VERSION VERSION_LESS 3.12)
target_include_directories(lammps PRIVATE ${CURL_INCLUDE_DIRS})
target_link_libraries(lammps PRIVATE ${CURL_LIBRARIES})
else()
target_link_libraries(lammps PRIVATE CURL::libcurl)
endif()
target_compile_definitions(lammps PRIVATE -DLMP_KIM_CURL)
set(LMP_DEBUG_CURL OFF CACHE STRING "Set libcurl verbose mode on/off. If on, it displays a lot of verbose information about its operations.")
mark_as_advanced(LMP_DEBUG_CURL)

View File

@ -14,16 +14,30 @@ endif()
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
option(DOWNLOAD_KOKKOS "Download the KOKKOS library instead of using the bundled one" OFF)
if(DOWNLOAD_KOKKOS)
# extract Kokkos-related variables and values so we can forward them to the Kokkos library build
get_cmake_property(_VARS VARIABLES)
list(FILTER _VARS INCLUDE REGEX ^Kokkos_)
foreach(_VAR IN LISTS _VARS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-D${_VAR}=${${_VAR}}")
endforeach()
message(STATUS "KOKKOS download requested - we will build our own")
file(DOWNLOAD https://github.com/kokkos/kokkos/compare/3.0.00...stanmoore1:lammps.diff ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")
if(CMAKE_REQUEST_PIC)
list(APPEND KOKKOS_LIB_BUILD_ARGS ${CMAKE_REQUEST_PIC})
endif()
# append other CMake variables that need to be forwarded to CMAKE_ARGS
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_INSTALL_LIBDIR=lib")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
ExternalProject_Add(kokkos_build
URL https://github.com/kokkos/kokkos/archive/3.0.00.tar.gz
URL_MD5 281c7093aa3a603276e93abdf4be23b9
PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_BINARY_DIR}/kokkos-lammps.patch
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
URL https://github.com/kokkos/kokkos/archive/3.1.00.tar.gz
URL_MD5 f638a6c786f748a602b26faa0e96ebab
CMAKE_ARGS ${KOKKOS_LIB_BUILD_ARGS}
BUILD_BYPRODUCTS <INSTALL_DIR>/lib/libkokkoscore.a
)
ExternalProject_get_property(kokkos_build INSTALL_DIR)
@ -39,9 +53,9 @@ if(DOWNLOAD_KOKKOS)
install(CODE "MESSAGE(FATAL_ERROR \"Installing liblammps with downloaded libraries is currently not supported.\")")
endif()
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3)
find_package(Kokkos 3.1)
if(NOT Kokkos_FOUND)
message(FATAL_ERROR "KOKKOS library not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it")
message(FATAL_ERROR "KOKKOS library version 3.1 or later not found, help CMake to find it by setting KOKKOS_LIBRARY, or set DOWNLOAD_KOKKOS=ON to download it")
endif()
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()

View File

@ -1,4 +1,11 @@
enable_language(Fortran)
# using lammps in a super-build setting
if(TARGET LATTE::latte)
target_link_libraries(lammps PRIVATE LATTE::latte)
return()
endif()
find_package(LATTE)
if(LATTE_FOUND)
set(DOWNLOAD_LATTE_DEFAULT OFF)
@ -35,5 +42,6 @@ else()
if(NOT LATTE_FOUND)
message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it")
endif()
target_link_libraries(lammps PRIVATE LATTE::latte)
# latte needs lapack
target_link_libraries(lammps PRIVATE LATTE::latte ${LAPACK_LIBRARIES})
endif()

View File

@ -74,12 +74,11 @@ if(DOWNLOAD_PLUMED)
install(CODE "MESSAGE(FATAL_ERROR \"Installing liblammps with downloaded libraries is currently not supported.\")")
endif()
if(PLUMED_MODE STREQUAL "STATIC")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_WRAPPER_CXX=1")
set_target_properties(LAMMPS::PLUMED PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/lib/libplumed.a INTERFACE_LINK_LIBRARIES "${PLUMED_LINK_LIBS};${CMAKE_DL_LIBS}")
elseif(PLUMED_MODE STREQUAL "SHARED")
set_target_properties(LAMMPS::PLUMED PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/lib/libplumed${CMAKE_SHARED_LIBRARY_SUFFIX} INTERFACE_LINK_LIBRARIES "${INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX};${CMAKE_DL_LIBS}")
elseif(PLUMED_MODE STREQUAL "RUNTIME")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_HAS_DLOPEN=1;__PLUMED_DEFAULT_KERNEL=${INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_DEFAULT_KERNEL=${INSTALL_DIR}/lib/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}")
set_target_properties(LAMMPS::PLUMED PROPERTIES IMPORTED_LOCATION ${INSTALL_DIR}/lib/libplumedWrapper.a INTERFACE_LINK_LIBRARIES "${CMAKE_DL_LIBS}")
endif()
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include)
@ -89,12 +88,11 @@ else()
pkg_check_modules(PLUMED REQUIRED plumed)
add_library(LAMMPS::PLUMED INTERFACE IMPORTED)
if(PLUMED_MODE STREQUAL "STATIC")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_WRAPPER_CXX=1")
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static)
elseif(PLUMED_MODE STREQUAL "SHARED")
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared)
elseif(PLUMED_MODE STREQUAL "RUNTIME")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_HAS_DLOPEN=1;__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}")
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_COMPILE_DEFINITIONS "__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel${CMAKE_SHARED_LIBRARY_SUFFIX}")
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime)
endif()
set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_LINK_LIBRARIES "${PLUMED_LOAD}")

View File

@ -2,7 +2,7 @@
set(CMAKE_CXX_COMPILER "clang++" CACHE STRING "" FORCE)
set(CMAKE_C_COMPILER "clang" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2 -DNDEBG" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
set(MPI_CXX "clang++" CACHE STRING "" FORCE)
set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)

12
cmake/presets/hip.cmake Normal file
View File

@ -0,0 +1,12 @@
# preset that will enable hipcc plus gcc with support for MPI and OpenMP (on Linux boxes)
set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE)
set(CMAKE_C_COMPILER "gcc" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)

View File

@ -49,22 +49,15 @@ include files provided with LAMMPS are included with double quotes
For headers declaring functions of the C-library, the corresponding
C++ versions should be included (examples: `#include <cstdlib>` or
`#include <cctypes>`). However, these includes are limited to those defined
in the C++98 standard. Some files thus must use the older style until
the minimum C++ standard requirement of LAMMPS is lifted to C++11 or
even beyond (examples: `#include <stdint.h>` versus `#include <cstdint>`
or `#include <inttypes.h>` versus `#include <cinttypes>`).
`#include <cctypes>` instead of `#include <stdlib.h>` or
`#include<ctypes.h>` ).
### C++ Standard Compliance
LAMMPS core files currently correspond to the C++98 standard. Files
requiring C++11 or later are only permitted in (optional) packages
and particularly packages that are not part of the list of commonly
used packages such as MOLECULE, KSPACE, MANYBODY, or RIGID.
Also, LAMMPS uses the C-style stdio library for I/O instead of iostreams.
Since using both at the same time can cause problems, iostreams should
be avoided where possible.
LAMMPS core files use standard conforming C++ compatible with the
C++11 standard, unless explicitly noted. Also, LAMMPS uses the C-style
stdio library for I/O instead of iostreams. Since using both at the
same time can cause problems, iostreams should be avoided where possible.
### Lean Header Files

View File

@ -320,11 +320,12 @@ to have an executable that will run on this and newer architectures.
.. note::
NVIDIA GPUs with CC 5.0 (Maxwell) and newer are not compatible with
CC 3.x (Kepler). If you run Kokkos on a newer architecture than what
LAMMPS was compiled with, there will be a significant delay during
device initialization since the just-in-time compiler has to
recompile the GPU kernel code for the new hardware.
If you run Kokkos on a newer GPU architecture than what LAMMPS was
compiled with, there will be a delay during device initialization
since the just-in-time compiler has to recompile all GPU kernels
for the new hardware. This is, however, not possible when compiled
for NVIDIA GPUs with CC 3.x (Kepler) for GPUs with CC 5.0 (Maxwell)
and newer as they are not compatible.
The settings discussed below have been tested with LAMMPS and are
confirmed to work. Kokkos is an active project with ongoing improvements
@ -343,73 +344,109 @@ be specified in uppercase.
:widths: auto
* - **Arch-ID**
- **HOST or GPU**
- **Description**
* - AMDAVX
- HOST
- AMD 64-bit x86 CPU (AVX 1)
* - EPYC
- HOST
- AMD EPYC Zen class CPU (AVX 2)
* - ARMV80
- HOST
- ARMv8.0 Compatible CPU
* - ARMV81
- HOST
- ARMv8.1 Compatible CPU
* - ARMV8_THUNDERX
- HOST
- ARMv8 Cavium ThunderX CPU
* - ARMV8_THUNDERX2
- HOST
- ARMv8 Cavium ThunderX2 CPU
* - WSM
- HOST
- Intel Westmere CPU (SSE 4.2)
* - SNB
- HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1)
* - HSW
- HOST
- Intel Haswell CPU (AVX 2)
* - BDW
- HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem)
* - SKX
- HOST
- Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem)
* - KNC
- HOST
- Intel Knights Corner Xeon Phi
* - KNL
- HOST
- Intel Knights Landing Xeon Phi
* - BGQ
- HOST
- IBM Blue Gene/Q CPU
* - POWER7
- IBM POWER8 CPU
- HOST
- IBM POWER7 CPU
* - POWER8
- HOST
- IBM POWER8 CPU
* - POWER9
- HOST
- IBM POWER9 CPU
* - KEPLER30
- GPU
- NVIDIA Kepler generation CC 3.0 GPU
* - KEPLER32
- GPU
- NVIDIA Kepler generation CC 3.2 GPU
* - KEPLER35
- GPU
- NVIDIA Kepler generation CC 3.5 GPU
* - KEPLER37
- GPU
- NVIDIA Kepler generation CC 3.7 GPU
* - MAXWELL50
- GPU
- NVIDIA Maxwell generation CC 5.0 GPU
* - MAXWELL52
- GPU
- NVIDIA Maxwell generation CC 5.2 GPU
* - MAXWELL53
- GPU
- NVIDIA Maxwell generation CC 5.3 GPU
* - PASCAL60
- GPU
- NVIDIA Pascal generation CC 6.0 GPU
* - PASCAL61
- GPU
- NVIDIA Pascal generation CC 6.1 GPU
* - VOLTA70
- GPU
- NVIDIA Volta generation CC 7.0 GPU
* - VOLTA72
- GPU
- NVIDIA Volta generation CC 7.2 GPU
* - TURING75
- GPU
- NVIDIA Turing generation CC 7.5 GPU
* - VEGA900
- GPU
- AMD GPU MI25 GFX900
* - VEGA906
- GPU
- AMD GPU MI50/MI60 GFX906
CMake build settings:
^^^^^^^^^^^^^^^^^^^^^
Basic CMake build settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^
For multicore CPUs using OpenMP, set these 2 variables.
.. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ENABLE_OPENMP=yes
-D BUILD_OMP=yes
@ -427,15 +464,19 @@ For NVIDIA GPUs using CUDA, set these variables:
.. code-block:: bash
-D Kokkos_ARCH_CPUARCH=yes # CPUARCH = CPU from list above
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_CUDA=yes
-D Kokkos_ENABLE_OPENMP=yes
-D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper
The wrapper value is the Cuda nvcc compiler wrapper provided in the
Kokkos library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should
include the full path name to the wrapper, e.g.
This will also enable executing FFTs on the GPU, either via the internal
KISSFFT library, or - by preference - with the cuFFT library bundled
with the CUDA toolkit, depending on whether CMake can identify its
location. The *wrapper* value for ``CMAKE_CXX_COMPILER`` variable is
the path to the CUDA nvcc compiler wrapper provided in the Kokkos
library: ``lib/kokkos/bin/nvcc_wrapper``\ . The setting should include
the full path name to the wrapper, e.g.
.. code-block:: bash
@ -455,8 +496,8 @@ common packages enabled, you can do the following:
cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake
cmake --build .
Traditional make settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^
Basic traditional make settings:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Choose which hardware to support in ``Makefile.machine`` via
``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings. See the
@ -467,7 +508,7 @@ For multicore CPUs using OpenMP:
.. code-block:: make
KOKKOS_DEVICES = OpenMP
KOKKOS_ARCH = CPUARCH # CPUARCH = CPU from list above
KOKKOS_ARCH = HOSTARCH # HOSTARCH = HOST from list above
For Intel KNLs using OpenMP:
@ -481,7 +522,8 @@ For NVIDIA GPUs using CUDA:
.. code-block:: make
KOKKOS_DEVICES = Cuda
KOKKOS_ARCH = CPUARCH,GPUARCH # CPUARCH = CPU from list above that is hosting the GPU
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
KOKKOS_CUDA_OPTIONS = "enable_lambda"
# GPUARCH = GPU from list above
FFT_INC = -DFFT_CUFFT # enable use of cuFFT (optional)
FFT_LIB = -lcufft # link to cuFFT library
@ -504,6 +546,44 @@ C++ compiler for non-Kokkos, non-CUDA files.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There are other allowed options when building with the KOKKOS package
that can improve performance or assist in debugging or profiling. Below
are some examples that may be useful in combination with LAMMPS. For
the full list (which keeps changing as the Kokkos package itself evolves),
please consult the Kokkos library documentation.
As alternative to using multi-threading via OpenMP
(``-DKokkos_ENABLE_OPENMP=on`` or ``KOKKOS_DEVICES=OpenMP``) it is also
possible to use Posix threads directly (``-DKokkos_ENABLE_PTHREAD=on``
or ``KOKKOS_DEVICES=Pthread``). While binding of threads to individual
or groups of CPU cores is managed in OpenMP with environment variables,
you need assistance from either the "hwloc" or "libnuma" library for the
Pthread thread parallelization option. To enable use with CMake:
``-DKokkos_ENABLE_HWLOC=on`` or ``-DKokkos_ENABLE_LIBNUMA=on``; and with
conventional make: ``KOKKOS_USE_TPLS=hwloc`` or
``KOKKOS_USE_TPLS=libnuma``.
The CMake option ``-DKokkos_ENABLE_LIBRT=on`` or the makefile setting
``KOKKOS_USE_TPLS=librt`` enables the use of a more accurate timer
mechanism on many Unix-like platforms for internal profiling.
The CMake option ``-DKokkos_ENABLE_DEBUG=on`` or the makefile setting
``KOKKOS_DEBUG=yes`` enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures. As to be expected, enabling
this option will negatively impact the performance and thus is only
recommended when developing a Kokkos-enabled style in LAMMPS.
The CMake option ``-DKokkos_ENABLE_CUDA_UVM=on`` or the makefile
setting ``KOKKOS_CUDA_OPTIONS=enable_lambda,force_uvm`` enables the
use of CUDA "Unified Virtual Memory" in Kokkos. Please note, that
the LAMMPS KOKKOS package must **always** be compiled with the
*enable_lambda* option when using GPUs.
----------
.. _latte:

View File

@ -9,10 +9,7 @@ different back end languages such as CUDA, OpenMP, or Pthreads. The
Kokkos library also provides data abstractions to adjust (at compile
time) the memory layout of data structures like 2d and 3d arrays to
optimize performance on different hardware. For more information on
Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_. Kokkos is
part of `Trilinos <https://www.trilinos.org/>`_. The Kokkos
library was written primarily by Carter Edwards, Christian Trott, and
Dan Sunderland (all Sandia).
Kokkos, see `GitHub <https://github.com/kokkos/kokkos>`_.
The LAMMPS KOKKOS package contains versions of pair, fix, and atom
styles that use data structures and macros provided by the Kokkos
@ -21,7 +18,7 @@ package was developed primarily by Christian Trott (Sandia) and Stan
Moore (Sandia) with contributions of various styles by others,
including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez
(Sandia). For more information on developing using Kokkos abstractions
see the Kokkos programmers' guide at /lib/kokkos/doc/Kokkos_PG.pdf.
see the Kokkos `Wiki <https://github.com/kokkos/kokkos/wiki>`_.
Kokkos currently provides support for 3 modes of execution (per MPI
task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
@ -31,33 +28,30 @@ compatible with specific hardware.
.. note::
Kokkos support within LAMMPS must be built with a C++11 compatible
compiler. This means GCC version 4.7.2 or later, Intel 14.0.4 or later, or
Clang 3.5.2 or later is required.
.. note::
To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA
To build with Kokkos support for NVIDIA GPUs, the NVIDIA CUDA toolkit
software version 9.0 or later must be installed on your system. See
the discussion for the :doc:`GPU package <Speed_gpu>` for details of how
to check and do this.
the discussion for the :doc:`GPU package <Speed_gpu>` for details of
how to check and do this.
.. note::
Kokkos with CUDA currently implicitly assumes that the MPI library
is CUDA-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is not
a problem when using only a single GPU with a single MPI rank. When
running with multiple MPI ranks, you may see segmentation faults without
CUDA-aware MPI support. These can be avoided by adding the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the LAMMPS command line or by
using the command :doc:`package kokkos cuda/aware off <package>` in the
input file.
Kokkos with CUDA currently implicitly assumes that the MPI library is
CUDA-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is
not a problem when using only a single GPU with a single MPI
rank. When running with multiple MPI ranks, you may see segmentation
faults without CUDA-aware MPI support. These can be avoided by adding
the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
LAMMPS command line or by using the command :doc:`package kokkos
cuda/aware off <package>` in the input file.
**Building LAMMPS with the KOKKOS package:**
Building LAMMPS with the KOKKOS package
"""""""""""""""""""""""""""""""""""""""
See the :ref:`Build extras <kokkos>` doc page for instructions.
**Running LAMMPS with the KOKKOS package:**
Running LAMMPS with the KOKKOS package
""""""""""""""""""""""""""""""""""""""
All Kokkos operations occur within the context of an individual MPI
task running on a single node of the machine. The total number of MPI
@ -66,7 +60,8 @@ usual manner via the mpirun or mpiexec commands, and is independent of
Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and
-npernode switches. Ditto for MPICH via -np and -ppn.
**Running on a multi-core CPU:**
Running on a multi-core CPU
^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package
for CPU acceleration, assuming one or more 16-core nodes.
@ -142,7 +137,8 @@ atom. When using the Kokkos Serial back end or the OpenMP back end with
a single thread, no duplication or atomic operations are used. For CUDA
and half neighbor lists, the KOKKOS package always uses atomic operations.
**Core and Thread Affinity:**
CPU Cores, Sockets and Thread Affinity
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When using multi-threading, it is important for performance to bind
both MPI tasks to physical cores, and threads to physical cores, so
@ -156,15 +152,16 @@ for your MPI installation), binding can be forced with these flags:
OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ...
Mvapich2 2.0: mpiexec -np 2 --bind-to socket --map-by socket ./lmp_mvapich ...
For binding threads with KOKKOS OpenMP, use thread affinity
environment variables to force binding. With OpenMP 3.1 (gcc 4.7 or
later, intel 12 or later) setting the environment variable
OMP_PROC_BIND=true should be sufficient. In general, for best
performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and
OMP_PLACES=threads. For binding threads with the KOKKOS pthreads
option, compile LAMMPS the KOKKOS HWLOC=yes option as described below.
For binding threads with KOKKOS OpenMP, use thread affinity environment
variables to force binding. With OpenMP 3.1 (gcc 4.7 or later, intel 12
or later) setting the environment variable ``OMP_PROC_BIND=true`` should
be sufficient. In general, for best performance with OpenMP 4.0 or later
set ``OMP_PROC_BIND=spread`` and ``OMP_PLACES=threads``. For binding
threads with the KOKKOS pthreads option, compile LAMMPS with the hwloc
or libnuma support enabled as described in the :ref:`extra build options page <kokkos>`.
**Running on Knight's Landing (KNL) Intel Xeon Phi:**
Running on Knight's Landing (KNL) Intel Xeon Phi
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a quick overview of how to use the KOKKOS package for the
Intel Knight's Landing (KNL) Xeon Phi:
@ -222,7 +219,8 @@ threads/task as Nt. The product of these two values should be N, i.e.
them in "native" mode, not "offload" mode like the USER-INTEL package
supports.
**Running on GPUs:**
Running on GPUs
^^^^^^^^^^^^^^^
Use the "-k" :doc:`command-line switch <Run_options>` to specify the
number of GPUs per node. Typically the -np setting of the mpirun command
@ -257,7 +255,7 @@ one or more nodes, each with two GPUs:
running on GPUs is to use "full" neighbor lists and set the Newton flag
to "off" for both pairwise and bonded interactions, along with threaded
communication. When running on Maxwell or Kepler GPUs, this will
typically be best. For Pascal GPUs, using "half" neighbor lists and
typically be best. For Pascal GPUs and beyond, using "half" neighbor lists and
setting the Newton flag to "on" may be faster. For many pair styles,
setting the neighbor binsize equal to twice the CPU default value will
give speedup, which is the default when running on GPUs. Use the "-pk
@ -270,13 +268,6 @@ one or more nodes, each with two GPUs:
mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
.. note::
For good performance of the KOKKOS package on GPUs, you must
have Kepler generation GPUs (or later). The Kokkos library exploits
texture cache options not supported by Telsa generation GPUs (or
older).
.. note::
When using a GPU, you will achieve the best performance if your
@ -293,7 +284,8 @@ one or more nodes, each with two GPUs:
kspace, etc., you must set the environment variable CUDA_LAUNCH_BLOCKING=1.
However, this will reduce performance and is not recommended for production runs.
**Run with the KOKKOS package by editing an input script:**
Run with the KOKKOS package by editing an input script
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Alternatively the effect of the "-sf" or "-pk" switches can be
duplicated by adding the :doc:`package kokkos <package>` or :doc:`suffix kk <suffix>` commands to your input script.
@ -316,17 +308,24 @@ You only need to use the :doc:`package kokkos <package>` command if you
wish to change any of its option defaults, as set by the "-k on"
:doc:`command-line switch <Run_options>`.
**Using OpenMP threading and CUDA together (experimental):**
**Using OpenMP threading and CUDA together:**
With the KOKKOS package, both OpenMP multi-threading and GPUs can be
used together in a few special cases. In the Makefile, the
KOKKOS_DEVICES variable must include both "Cuda" and "OpenMP", as is
the case for /src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi
compiled and used together in a few special cases. In the makefile for
the conventional build, the KOKKOS_DEVICES variable must include both,
"Cuda" and "OpenMP", as is the case for ``/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi``.
.. code-block:: bash
KOKKOS_DEVICES=Cuda,OpenMP
When building with CMake you need to enable both features as it is done
in the ``kokkos-cuda.cmake`` CMake preset file.
.. code-block:: bash
cmake ../cmake -DKokkos_ENABLE_CUDA=yes -DKokkos_ENABLE_OPENMP=yes
The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA,
using the "-sf kk" in the command line gives the default CUDA version
everywhere. However, if the "/kk/host" suffix is added to a specific
@ -360,7 +359,8 @@ suffix for kspace and bonds, angles, etc. in the input file and the
sure the environment variable CUDA_LAUNCH_BLOCKING is not set to "1"
so CPU/GPU overlap can occur.
**Speed-ups to expect:**
Performance to expect
"""""""""""""""""""""
The performance of KOKKOS running in different modes is a function of
your hardware, which KOKKOS-enable styles are used, and the problem
@ -377,52 +377,26 @@ Generally speaking, the following rules of thumb apply:
performance of a KOKKOS style is a bit slower than the USER-OMP
package.
* When running large number of atoms per GPU, KOKKOS is typically faster
than the GPU package.
than the GPU package when compiled for double precision. The benefit
of using single or mixed precision with the GPU package depends
significantly on the hardware in use and the simulated system and pair
style.
* When running on Intel hardware, KOKKOS is not as fast as
the USER-INTEL package, which is optimized for that hardware.
the USER-INTEL package, which is optimized for x86 hardware (not just
from Intel) and compilation with the Intel compilers. The USER-INTEL
package also can increase the vector length of vector instructions
by switching to single or mixed precision mode.
See the `Benchmark page <https://lammps.sandia.gov/bench.html>`_ of the
LAMMPS web site for performance of the KOKKOS package on different
hardware.
**Advanced Kokkos options:**
Advanced Kokkos options
"""""""""""""""""""""""
There are other allowed options when building with the KOKKOS package.
As explained on the :ref:`Build extras <kokkos>` doc page,
they can be set either as variables on the make command line or in
Makefile.machine, or they can be specified as CMake variables. Each
takes a value shown below. The default value is listed, which is set
in the lib/kokkos/Makefile.kokkos file.
* KOKKOS_DEBUG, values = *yes*\ , *no*\ , default = *no*
* KOKKOS_USE_TPLS, values = *hwloc*\ , *librt*\ , *experimental_memkind*, default = *none*
* KOKKOS_CXX_STANDARD, values = *c++11*\ , *c++1z*\ , default = *c++11*
* KOKKOS_OPTIONS, values = *aggressive_vectorization*, *disable_profiling*, default = *none*
* KOKKOS_CUDA_OPTIONS, values = *force_uvm*, *use_ldg*, *rdc*\ , *enable_lambda*, default = *enable_lambda*
KOKKOS_USE_TPLS=hwloc binds threads to hardware cores, so they do not
migrate during a simulation. KOKKOS_USE_TPLS=hwloc should always be
used if running with KOKKOS_DEVICES=Pthreads for pthreads. It is not
necessary for KOKKOS_DEVICES=OpenMP for OpenMP, because OpenMP
provides alternative methods via environment variables for binding
threads to hardware cores. More info on binding threads to cores is
given on the :doc:`Speed omp <Speed_omp>` doc page.
KOKKOS_USE_TPLS=librt enables use of a more accurate timer mechanism
on most Unix platforms. This library is not available on all
platforms.
KOKKOS_DEBUG is only useful when developing a Kokkos-enabled style
within LAMMPS. KOKKOS_DEBUG=yes enables printing of run-time
debugging information that can be useful. It also enables runtime
bounds checking on Kokkos data structures.
KOKKOS_CXX_STANDARD and KOKKOS_OPTIONS are typically not changed when
building LAMMPS.
KOKKOS_CUDA_OPTIONS are additional options for CUDA. The LAMMPS KOKKOS
package must be compiled with the *enable_lambda* option when using
GPUs.
There are other allowed options when building with the KOKKOS package
that can improve performance or assist in debugging or profiling.
They are explained on the :ref:`KOKKOS section of the build extras <kokkos>` doc page,
Restrictions
""""""""""""

View File

@ -299,6 +299,12 @@ match what is stored in the restart file. So if you wish to change
them, you should use the change_box command after the read_restart
command.
.. note::
Changing a periodic boundary to a non-periodic one will also
cause the image flag for that dimension to be reset to 0 for
all atoms. LAMMPS will print a warning message, if that happens.
----------
The *ortho* and *triclinic* keywords convert the simulation box to be

View File

@ -3,6 +3,9 @@
compute orientorder/atom command
================================
compute orientorder/atom/kk command
=======================
Syntax
""""""
@ -128,6 +131,30 @@ too frequently.
:doc:`special_bonds <special_bonds>` command that includes all pairs in
the neighbor list.
----------
Styles with a *gpu*\ , *intel*\ , *kk*\ , *omp*\ , or *opt* suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed on the :doc:`Speed packages <Speed_packages>` doc
page. The accelerated styles take the same arguments and should
produce the same results, except for round-off and precision issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the :doc:`Build package <Build_package>` doc page for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the :doc:`-suffix command-line switch <Run_options>` when you invoke LAMMPS, or you can use the
:doc:`suffix <suffix>` command in your input script.
See the :doc:`Speed packages <Speed_packages>` doc page for more
instructions on how to use the accelerated styles effectively.
----------
**Output info:**
This compute calculates a per-atom array with *nlvalues* columns,

View File

@ -2,6 +2,7 @@
compute stress/atom command
===========================
compute centroid/stress/atom command
====================================
@ -223,15 +224,14 @@ The per-atom array values will be in pressure\*volume
Restrictions
""""""""""""
Currently, compute *centroid/stress/atom* does not support
pair styles with many-body interactions,
such as :doc:`Tersoff <pair_tersoff>`,
and LAMMPS will generate an error in such cases.
In principal, equivalent formulation
to that of angle, dihedral and improper contributions
in the virial :math:`W_{ab}` formula
can also be applied to the many-body pair styles,
and is planned in the future.
Currently (Spring 2020), compute *centroid/stress/atom* does not support
pair styles with many-body interactions, such as :doc:`Tersoff
<pair_tersoff>`, or pair styles with long-range Coulomb interactions.
LAMMPS will generate an error in such cases. In principal, equivalent
formulation to that of angle, dihedral and improper contributions in the
virial :math:`W_{ab}` formula can also be applied to the many-body pair
styles, and is planned in the future.
Related commands
""""""""""""""""

52
doc/src/fix_bond_react.rst Normal file → Executable file
View File

@ -158,7 +158,9 @@ The following comments pertain to each *react* argument (in other
words, can be customized for each reaction, or reaction step):
A check for possible new reaction sites is performed every *Nevery*
timesteps.
timesteps. *Nevery* can be specified with an equal-style
:doc:`variable <variable>`, whose value is rounded up to the nearest
integer.
Three physical conditions must be met for a reaction to occur. First,
a bonding atom pair must be identified within the reaction distance
@ -171,19 +173,29 @@ modified to match the post-reaction template.
A bonding atom pair will be identified if several conditions are met.
First, a pair of atoms I,J within the specified react-group-ID of type
itype and jtype must be separated by a distance between *Rmin* and
*Rmax*\ . It is possible that multiple bonding atom pairs are
identified: if the bonding atoms in the pre-reacted template are 1-2
neighbors, i.e. directly bonded, the farthest bonding atom partner is
set as its bonding partner; otherwise, the closest potential partner
is chosen. Then, if both an atom I and atom J have each other as their
bonding partners, these two atoms are identified as the bonding atom
pair of the reaction site. Once this unique bonding atom pair is
identified for each reaction, there could two or more reactions that
involve a given atom on the same timestep. If this is the case, only
one such reaction is permitted to occur. This reaction is chosen
randomly from all potential reactions. This capability allows e.g. for
different reaction pathways to proceed from identical reaction sites
with user-specified probabilities.
*Rmax*\ . *Rmin* and *Rmax* can be specified with equal-style
:doc:`variables <variable>`. For example, these reaction cutoffs can
be a function of the reaction conversion using the following commands:
.. code-block:: LAMMPS
variable rmax equal 0 # initialize variable before bond/react
fix myrxn all bond/react react myrxn1 all 1 0 v_rmax mol1 mol2 map_file.txt
variable rmax equal 3+f_myrxn[1]/100 # arbitrary function of reaction count
It is possible that multiple bonding atom pairs are identified: if the
bonding atoms in the pre-reacted template are 1-2 neighbors, i.e.
directly bonded, the farthest bonding atom partner is set as its
bonding partner; otherwise, the closest potential partner is chosen.
Then, if both an atom I and atom J have each other as their bonding
partners, these two atoms are identified as the bonding atom pair of
the reaction site. Once this unique bonding atom pair is identified
for each reaction, there could two or more reactions that involve a
given atom on the same timestep. If this is the case, only one such
reaction is permitted to occur. This reaction is chosen randomly from
all potential reactions. This capability allows e.g. for different
reaction pathways to proceed from identical reaction sites with
user-specified probabilities.
The pre-reacted molecule template is specified by a molecule command.
This molecule template file contains a sample reaction site and its
@ -419,7 +431,8 @@ it occurs:
The *prob* keyword can affect whether or not an eligible reaction
actually occurs. The fraction setting must be a value between 0.0 and
1.0. A uniform random number between 0.0 and 1.0 is generated and the
1.0, and can be specified with an equal-style :doc:`variable <variable>`.
A uniform random number between 0.0 and 1.0 is generated and the
eligible reaction only occurs if the random number is less than the
fraction. Up to N reactions are permitted to occur, as optionally
specified by the *max_rxn* keyword.
@ -489,10 +502,11 @@ local command.
**Restart, fix_modify, output, run start/stop, minimize info:**
Cumulative reaction counts for each reaction are written to :doc:`binary restart files <restart>`. These values are associated with the
reaction name (react-ID). Additionally, internally-created per-atom
properties are stored to allow for smooth restarts. None of the
:doc:`fix_modify <fix_modify>` options are relevant to this fix.
Cumulative reaction counts for each reaction are written to :doc:`binary restart files <restart>`.
These values are associated with the reaction name (react-ID).
Additionally, internally-created per-atom properties are stored to
allow for smooth restarts. None of the :doc:`fix_modify <fix_modify>`
options are relevant to this fix.
This fix computes one statistic for each *react* argument that it
stores in a global vector, of length 'number of react arguments', that

View File

@ -47,16 +47,22 @@ and the specified geometric :doc:`region <region>` in order to have
energy added or subtracted to it. If not specified, then the atoms in
the group are affected wherever they may move to.
Heat addition/subtraction is performed every N timesteps. The *eflux*
parameter can be specified as a numeric constant or as a variable (see
below). If it is a numeric constant or equal-style variable which
evaluates to a scalar value, then the *eflux* determines the change in
aggregate energy of the entire group of atoms per unit time, e.g. in
eV/psec for :doc:`metal units <units>`. In this case it is an
"extensive" quantity, meaning its magnitude should be scaled with the
number of atoms in the group. Note that since *eflux* has per-time
units (i.e. it is a flux), this means that a larger value of N will
add/subtract a larger amount of energy each time the fix is invoked.
Heat addition/subtraction is performed every N timesteps.
The *eflux* parameter can be specified as a numeric constant or as an
equal- or atom-style :doc:`variable <variable>`. If the value is a
variable, it should be specified as v_name, where *name* is the variable
name. In this case, the variable will be evaluated each timestep, and
its current value(s) used to determine the flux.
If *eflux* is a numeric constant or equal-style variable which evaluates
to a scalar value, then *eflux* determines the change in aggregate energy
of the entire group of atoms per unit time, e.g. in eV/psec for
:doc:`metal units <units>`. In this case it is an "extensive" quantity,
meaning its magnitude should be scaled with the number of atoms in the
group. Note that since *eflux* also has per-time units (i.e. it is a
flux), this means that a larger value of N will add/subtract a larger
amount of energy each time the fix is invoked.
.. note::
@ -71,12 +77,6 @@ the energy flux for a single atom, again in units of energy per unit
time. In this case, each value is an "intensive" quantity, which need
not be scaled with the number of atoms in the group.
As mentioned above, the *eflux* parameter can be specified as an
equal-style or atom_style :doc:`variable <variable>`. If the value is a
variable, it should be specified as v_name, where name is the variable
name. In this case, the variable will be evaluated each timestep, and
its value(s) used to determine the flux.
Equal-style variables can specify formulas with various mathematical
functions, and include :doc:`thermo_style <thermo_style>` command
keywords for the simulation box parameters and timestep and elapsed

View File

@ -15,15 +15,14 @@ Syntax
* k_n = normal repulsion strength (force/distance units or pressure units - see discussion below)
* c_n = normal damping coefficient (force/distance units or pressure units - see discussion below)
* c_t = tangential damping coefficient (force/distance units or pressure units - see discussion below)
* wallstyle = *xplane* or *yplane* or *zplane* or *zcylinder*
* wallstyle = *xplane* or *yplane* or *zplane*
* args = list of arguments for a particular style
.. parsed-literal::
*xplane* or *yplane* args = lo hi
*xplane* or *yplane* or *zplane* args = lo hi
lo,hi = position of lower and upper plane (distance units), either can be NULL)
*zcylinder* args = radius
radius = cylinder radius (distance units)
* zero or more keyword/value pairs may be appended to args
* keyword = *wiggle*
@ -60,8 +59,7 @@ those specified with the :doc:`pair_style body/rounded/polyhedron <pair_body_rou
The *wallstyle* can be planar or cylindrical. The 3 planar options
specify a pair of walls in a dimension. Wall positions are given by
*lo* and *hi*\ . Either of the values can be specified as NULL if a
single wall is desired. For a *zcylinder* wallstyle, the cylinder's
axis is at x = y = 0.0, and the radius of the cylinder is specified.
single wall is desired.
Optionally, the wall can be moving, if the *wiggle* keyword is appended.
@ -71,8 +69,7 @@ particles. The arguments to the *wiggle* keyword specify a dimension
for the motion, as well as it's *amplitude* and *period*\ . Note that
if the dimension is in the plane of the wall, this is effectively a
shearing motion. If the dimension is perpendicular to the wall, it is
more of a shaking motion. A *zcylinder* wall can only be wiggled in
the z dimension.
more of a shaking motion.
Each timestep, the position of a wiggled wall in the appropriate *dim*
is set according to this equation:

View File

@ -499,6 +499,7 @@ cuda
Cuda
CUDA
CuH
cuFFT
Cummins
Curk
customIDs
@ -1544,6 +1545,7 @@ libmeam
libmessage
libmpi
libmpich
libnuma
libplumed
libplumedKernel
libpng

View File

@ -0,0 +1,56 @@
# two monomer nylon example
# reaction produces a condensed water molecule
units real
boundary p p p
atom_style full
kspace_style pppm 1.0e-4
pair_style lj/class2/coul/long 8.5
angle_style class2
bond_style class2
dihedral_style class2
improper_style class2
read_data tiny_nylon.data
variable runsteps equal 1000
variable prob1 equal step/v_runsteps*2
variable prob2 equal (step/v_runsteps)>0.5
velocity all create 300.0 4928459 dist gaussian
molecule mol1 rxn1_stp1_unreacted.data_template
molecule mol2 rxn1_stp1_reacted.data_template
molecule mol3 rxn1_stp2_unreacted.data_template
molecule mol4 rxn1_stp2_reacted.data_template
thermo 50
# dump 1 all xyz 1 test_vis.xyz
fix myrxns all bond/react stabilization yes statted_grp .03 &
react rxn1 all 1 0.0 5.0 mol1 mol2 rxn1_stp1_map prob v_prob1 1234 &
react rxn2 all 1 0.0 5.0 mol3 mol4 rxn1_stp2_map prob v_prob2 1234
fix 1 statted_grp_REACT nvt temp 300 300 100
# optionally, you can customize behavior of reacting atoms,
# by using the internally-created 'bond_react_MASTER_group', like so:
fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1
thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
# restart 100 restart1 restart2
run ${runsteps}
# write_restart restart_longrun
# write_data restart_longrun.data

View File

@ -0,0 +1,201 @@
LAMMPS (15 Apr 2020)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:94)
using 1 OpenMP thread(s) per MPI task
# two monomer nylon example
# reaction produces a condensed water molecule
units real
boundary p p p
atom_style full
kspace_style pppm 1.0e-4
pair_style lj/class2/coul/long 8.5
angle_style class2
bond_style class2
dihedral_style class2
improper_style class2
read_data tiny_nylon.data
orthogonal box = (-25 -25 -25) to (25 25 25)
1 by 1 by 1 MPI processor grid
reading atoms ...
44 atoms
reading velocities ...
44 velocities
scanning bonds ...
9 = max bonds/atom
scanning angles ...
21 = max angles/atom
scanning dihedrals ...
29 = max dihedrals/atom
scanning impropers ...
29 = max impropers/atom
reading bonds ...
42 bonds
reading angles ...
74 angles
reading dihedrals ...
100 dihedrals
reading impropers ...
44 impropers
4 = max # of 1-2 neighbors
6 = max # of 1-3 neighbors
12 = max # of 1-4 neighbors
41 = max # of special neighbors
special bonds CPU = 0.000385045 secs
read_data CPU = 0.013443 secs
variable runsteps equal 1000
variable prob1 equal step/v_runsteps*2
variable prob2 equal (step/v_runsteps)>0.5
velocity all create 300.0 4928459 dist gaussian
molecule mol1 rxn1_stp1_unreacted.data_template
Read molecule template mol1:
1 molecules
18 atoms with max type 8
16 bonds with max type 14
25 angles with max type 28
23 dihedrals with max type 36
14 impropers with max type 11
molecule mol2 rxn1_stp1_reacted.data_template
Read molecule template mol2:
1 molecules
18 atoms with max type 9
17 bonds with max type 13
31 angles with max type 27
39 dihedrals with max type 33
20 impropers with max type 1
molecule mol3 rxn1_stp2_unreacted.data_template
Read molecule template mol3:
1 molecules
15 atoms with max type 9
14 bonds with max type 13
25 angles with max type 27
30 dihedrals with max type 33
16 impropers with max type 1
molecule mol4 rxn1_stp2_reacted.data_template
Read molecule template mol4:
1 molecules
15 atoms with max type 11
13 bonds with max type 15
19 angles with max type 29
16 dihedrals with max type 32
10 impropers with max type 13
thermo 50
# dump 1 all xyz 1 test_vis.xyz
fix myrxns all bond/react stabilization yes statted_grp .03 react rxn1 all 1 0.0 5.0 mol1 mol2 rxn1_stp1_map prob v_prob1 1234 react rxn2 all 1 0.0 5.0 mol3 mol4 rxn1_stp2_map prob v_prob2 1234
WARNING: Bond/react: Atom affected by reaction rxn1 too close to template edge (src/USER-REACTION/fix_bond_react.cpp:2051)
WARNING: Bond/react: Atom affected by reaction rxn2 too close to template edge (src/USER-REACTION/fix_bond_react.cpp:2051)
dynamic group bond_react_MASTER_group defined
dynamic group statted_grp_REACT defined
fix 1 statted_grp_REACT nvt temp 300 300 100
# optionally, you can customize behavior of reacting atoms,
# by using the internally-created 'bond_react_MASTER_group', like so:
fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1
thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
# restart 100 restart1 restart2
run ${runsteps}
run 1000
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:332)
G vector (1/distance) = 0.0534597
grid = 2 2 2
stencil order = 5
estimated absolute RMS force accuracy = 0.0402256
estimated relative force accuracy = 0.000121138
using double precision FFTW3
3d grid and FFT values/proc = 343 8
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 10 10 10
2 neighbor lists, perpetual/occasional/extra = 1 1 0
(1) pair lj/class2/coul/long, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
(2) fix bond/react, occasional, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
WARNING: Inconsistent image flags (src/domain.cpp:812)
Per MPI rank memory allocation (min/avg/max) = 33.78 | 33.78 | 33.78 Mbytes
Step Temp Press Density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
0 300 346.78165 0.0034851739 0 0 0 0
50 262.63913 -492.10749 0.0034851739 0.1 0 1 0
100 766.52962 -29.714349 0.0034851739 0.2 0 1 0
150 503.86837 50.220304 0.0034851739 0.3 0 1 0
200 456.51295 12.312892 0.0034851739 0.4 0 1 0
250 391.54928 9.2335844 0.0034851739 0.5 0 1 0
300 336.6988 -47.193937 0.0034851739 0.6 0 1 0
350 254.06985 -9.2867898 0.0034851739 0.7 0 1 0
400 259.41098 -25.657321 0.0034851739 0.8 0 1 0
450 258.10364 22.5086 0.0034851739 0.9 0 1 0
500 272.13412 -6.5391448 0.0034851739 1 0 1 0
550 202.75504 54.658731 0.0034851739 1.1 1 1 1
600 344.79887 23.798478 0.0034851739 1.2 1 1 1
650 328.44488 -29.908484 0.0034851739 1.3 1 1 1
700 280.13593 -8.3223255 0.0034851739 1.4 1 1 1
750 300.67624 1.0632669 0.0034851739 1.5 1 1 1
800 376.64234 12.488392 0.0034851739 1.6 1 1 1
850 321.07642 19.814074 0.0034851739 1.7 1 1 1
900 332.23751 30.814079 0.0034851739 1.8 1 1 1
950 311.14029 5.7853136 0.0034851739 1.9 1 1 1
1000 253.14634 -37.560642 0.0034851739 2 1 1 1
Loop time of 0.379454 on 1 procs for 1000 steps with 44 atoms
Performance: 227.696 ns/day, 0.105 hours/ns, 2635.368 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.069723 | 0.069723 | 0.069723 | 0.0 | 18.37
Bond | 0.14802 | 0.14802 | 0.14802 | 0.0 | 39.01
Kspace | 0.044252 | 0.044252 | 0.044252 | 0.0 | 11.66
Neigh | 0.072359 | 0.072359 | 0.072359 | 0.0 | 19.07
Comm | 0.0044748 | 0.0044748 | 0.0044748 | 0.0 | 1.18
Output | 0.0022775 | 0.0022775 | 0.0022775 | 0.0 | 0.60
Modify | 0.036509 | 0.036509 | 0.036509 | 0.0 | 9.62
Other | | 0.00184 | | | 0.48
Nlocal: 44 ave 44 max 44 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 3 ave 3 max 3 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 722 ave 722 max 722 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 722
Ave neighs/atom = 16.4091
Ave special neighs/atom = 9.77273
Neighbor list builds = 1000
Dangerous builds = 0
# write_restart restart_longrun
# write_data restart_longrun.data
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -0,0 +1,201 @@
LAMMPS (15 Apr 2020)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:94)
using 1 OpenMP thread(s) per MPI task
# two monomer nylon example
# reaction produces a condensed water molecule
units real
boundary p p p
atom_style full
kspace_style pppm 1.0e-4
pair_style lj/class2/coul/long 8.5
angle_style class2
bond_style class2
dihedral_style class2
improper_style class2
read_data tiny_nylon.data
orthogonal box = (-25 -25 -25) to (25 25 25)
1 by 2 by 2 MPI processor grid
reading atoms ...
44 atoms
reading velocities ...
44 velocities
scanning bonds ...
9 = max bonds/atom
scanning angles ...
21 = max angles/atom
scanning dihedrals ...
29 = max dihedrals/atom
scanning impropers ...
29 = max impropers/atom
reading bonds ...
42 bonds
reading angles ...
74 angles
reading dihedrals ...
100 dihedrals
reading impropers ...
44 impropers
4 = max # of 1-2 neighbors
6 = max # of 1-3 neighbors
12 = max # of 1-4 neighbors
41 = max # of special neighbors
special bonds CPU = 0.000431282 secs
read_data CPU = 0.0129571 secs
variable runsteps equal 1000
variable prob1 equal step/v_runsteps*2
variable prob2 equal (step/v_runsteps)>0.5
velocity all create 300.0 4928459 dist gaussian
molecule mol1 rxn1_stp1_unreacted.data_template
Read molecule template mol1:
1 molecules
18 atoms with max type 8
16 bonds with max type 14
25 angles with max type 28
23 dihedrals with max type 36
14 impropers with max type 11
molecule mol2 rxn1_stp1_reacted.data_template
Read molecule template mol2:
1 molecules
18 atoms with max type 9
17 bonds with max type 13
31 angles with max type 27
39 dihedrals with max type 33
20 impropers with max type 1
molecule mol3 rxn1_stp2_unreacted.data_template
Read molecule template mol3:
1 molecules
15 atoms with max type 9
14 bonds with max type 13
25 angles with max type 27
30 dihedrals with max type 33
16 impropers with max type 1
molecule mol4 rxn1_stp2_reacted.data_template
Read molecule template mol4:
1 molecules
15 atoms with max type 11
13 bonds with max type 15
19 angles with max type 29
16 dihedrals with max type 32
10 impropers with max type 13
thermo 50
# dump 1 all xyz 1 test_vis.xyz
fix myrxns all bond/react stabilization yes statted_grp .03 react rxn1 all 1 0.0 5.0 mol1 mol2 rxn1_stp1_map prob v_prob1 1234 react rxn2 all 1 0.0 5.0 mol3 mol4 rxn1_stp2_map prob v_prob2 1234
WARNING: Bond/react: Atom affected by reaction rxn1 too close to template edge (src/USER-REACTION/fix_bond_react.cpp:2051)
WARNING: Bond/react: Atom affected by reaction rxn2 too close to template edge (src/USER-REACTION/fix_bond_react.cpp:2051)
dynamic group bond_react_MASTER_group defined
dynamic group statted_grp_REACT defined
fix 1 statted_grp_REACT nvt temp 300 300 100
# optionally, you can customize behavior of reacting atoms,
# by using the internally-created 'bond_react_MASTER_group', like so:
fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1
thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
# restart 100 restart1 restart2
run ${runsteps}
run 1000
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:332)
G vector (1/distance) = 0.0534597
grid = 2 2 2
stencil order = 5
estimated absolute RMS force accuracy = 0.0402256
estimated relative force accuracy = 0.000121138
using double precision FFTW3
3d grid and FFT values/proc = 252 2
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 10 10 10
2 neighbor lists, perpetual/occasional/extra = 1 1 0
(1) pair lj/class2/coul/long, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
(2) fix bond/react, occasional, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
WARNING: Inconsistent image flags (src/domain.cpp:812)
Per MPI rank memory allocation (min/avg/max) = 33.66 | 33.88 | 34.43 Mbytes
Step Temp Press Density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
0 300 346.78165 0.0034851739 0 0 0 0
50 266.5092 -90.813802 0.0034851739 0.1 0 1 0
100 559.41271 -53.23688 0.0034851739 0.2 0 1 0
150 489.90516 31.555817 0.0034851739 0.3 0 1 0
200 326.18391 7.7889992 0.0034851739 0.4 0 1 0
250 339.78203 2.3919541 0.0034851739 0.5 0 1 0
300 370.90263 -32.01673 0.0034851739 0.6 0 1 0
350 294.07547 -5.4019813 0.0034851739 0.7 0 1 0
400 287.76477 12.254133 0.0034851739 0.8 0 1 0
450 293.36482 66.372956 0.0034851739 0.9 0 1 0
500 246.84496 26.132317 0.0034851739 1 0 1 0
550 253.08778 -15.350262 0.0034851739 1.1 1 1 1
600 358.83641 25.007371 0.0034851739 1.2 1 1 1
650 320.51492 -32.34823 0.0034851739 1.3 1 1 1
700 310.87976 -8.2306669 0.0034851739 1.4 1 1 1
750 307.54142 12.025818 0.0034851739 1.5 1 1 1
800 272.51724 -22.92823 0.0034851739 1.6 1 1 1
850 268.66181 10.069534 0.0034851739 1.7 1 1 1
900 265.5531 -10.471377 0.0034851739 1.8 1 1 1
950 259.43086 9.4546712 0.0034851739 1.9 1 1 1
1000 247.14622 20.250308 0.0034851739 2 1 1 1
Loop time of 0.357762 on 4 procs for 1000 steps with 44 atoms
Performance: 241.502 ns/day, 0.099 hours/ns, 2795.157 timesteps/s
99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.0003917 | 0.015545 | 0.033317 | 11.9 | 4.35
Bond | 0.0010131 | 0.030153 | 0.076975 | 18.2 | 8.43
Kspace | 0.092857 | 0.1462 | 0.18688 | 10.7 | 40.87
Neigh | 0.043786 | 0.044014 | 0.044189 | 0.1 | 12.30
Comm | 0.03636 | 0.038345 | 0.040538 | 0.8 | 10.72
Output | 0.00091578 | 0.0012541 | 0.0020923 | 1.4 | 0.35
Modify | 0.075379 | 0.080791 | 0.086052 | 1.8 | 22.58
Other | | 0.00146 | | | 0.41
Nlocal: 11 ave 32 max 0 min
Histogram: 2 0 1 0 0 0 0 0 0 1
Nghost: 40 ave 51 max 19 min
Histogram: 1 0 0 0 0 0 0 1 0 2
Neighs: 191 ave 529 max 0 min
Histogram: 2 0 0 0 1 0 0 0 0 1
Total # of neighbors = 764
Ave neighs/atom = 17.3636
Ave special neighs/atom = 9.77273
Neighbor list builds = 1000
Dangerous builds = 0
# write_restart restart_longrun
# write_data restart_longrun.data
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin : June 2018
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
@ -109,7 +109,7 @@ __kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
numtyp rsq = delx*delx+dely*dely+delz*delz;
int mtype=itype*lj_types+jtype;
if (rsq<cutsq_sigma[mtype].x) { // cutsq
if (rsq<cutsq_sigma[mtype].x) { // cutsq
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
@ -250,7 +250,7 @@ __kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
numtyp delz = ix.z-jx.z;
numtyp rsq = delx*delx+dely*dely+delz*delz;
if (rsq<cutsq_sigma[mtype].x) { // cutsq
if (rsq<cutsq_sigma[mtype].x) { // cutsq
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndtrung@umich.edu
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndtrung@umich.edu
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin : 8/15/2012
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin : July 2011
// email : a.kohlmeyer@temple.edu
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin : June 2018
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin : Jan 15, 2014
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -9,9 +9,8 @@
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
// __________________________________________________________________________
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"
@ -53,8 +52,8 @@ __kernel void kernel_nbor(const __global numtyp4 *restrict x_,
int itype=fast_mul(iw,ntypes);
int newj=0;
for ( ; nbor<nbor_end; nbor+=nbor_pitch) {
int j=dev_ij[nbor];
j &= NEIGHMASK;
int sj=dev_ij[nbor];
int j = sj & NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
int jtype=jx.w;
int mtype=itype+jtype;
@ -69,7 +68,7 @@ __kernel void kernel_nbor(const __global numtyp4 *restrict x_,
rsq+=t*t;
if (rsq<cf.x) {
dev_nbor[packed]=j;
dev_nbor[packed]=sj;
packed+=nbor_pitch;
newj++;
}
@ -117,8 +116,8 @@ __kernel void kernel_nbor_fast(const __global numtyp4 *restrict x_,
int newj=0;
for ( ; nbor<nbor_end; nbor+=nbor_pitch) {
int j=dev_ij[nbor];
j &= NEIGHMASK;
int sj=dev_ij[nbor];
int j = sj & NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
int jtype=jx.w;
int mtype=itype+jtype;
@ -133,7 +132,7 @@ __kernel void kernel_nbor_fast(const __global numtyp4 *restrict x_,
rsq+=t*t;
if (rsq<cutsq[mtype]) {
dev_nbor[packed]=j;
dev_nbor[packed]=sj;
packed+=nbor_pitch;
newj++;
}

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin : Mon May 16 2011
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin : 7/12/2012
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ibains@nvidia.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"

View File

@ -12,7 +12,7 @@
//
// begin :
// email : penwang@nvidia.com, brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_preprocessor.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin : Fri May 06 2011
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin : Fri May 06 2011
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_ellipsoid_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin : Tue March 26, 2013
// email : brownw@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin : Thu April 17, 2014
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_mod_extra.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_tersoff_zbl_extra.h"

View File

@ -10,7 +10,7 @@
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
begin :
email : pl.rodolfo@gmail.com
dekoning@ifi.unicamp.br
***************************************************************************/
@ -31,10 +31,10 @@ __kernel void k_ufm(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict uf3,
const int lj_types,
const __global numtyp *restrict sp_lj,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid, ii, offset;
@ -46,19 +46,19 @@ __kernel void k_ufm(const __global numtyp4 *restrict x_,
acctyp virial[6];
for (int i=0; i<6; i++)
virial[i]=(acctyp)0;
if (ii<inum) {
int i, numj, nbor, nbor_end;
__local int n_stride;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
int itype=ix.w;
numtyp factor_lj;
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
@ -71,12 +71,12 @@ __kernel void k_ufm(const __global numtyp4 *restrict x_,
numtyp dely = ix.y-jx.y;
numtyp delz = ix.z-jx.z;
numtyp rsq = delx*delx+dely*dely+delz*delz;
int mtype=itype*lj_types+jtype;
if (rsq<uf1[mtype].z) {
numtyp expuf = exp(- rsq * uf1[mtype].y);
numtyp force = factor_lj * uf1[mtype].x * expuf / (1.0 - expuf);
f.x += delx*force;
f.y += dely*force;
f.z += delz*force;
@ -102,17 +102,17 @@ __kernel void k_ufm(const __global numtyp4 *restrict x_,
__kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict uf1_in,
const __global numtyp4 *restrict uf3_in,
const __global numtyp4 *restrict uf3_in,
const __global numtyp *restrict sp_lj_in,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const int eflag, const int vflag, const int inum,
const __global int * dev_nbor,
const __global int * dev_packed,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid, ii, offset;
atom_info(t_per_atom,ii,tid,offset);
__local numtyp4 uf1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
__local numtyp4 uf3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
__local numtyp sp_lj[4];
@ -123,7 +123,7 @@ __kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
if (eflag>0)
uf3[tid]=uf3_in[tid];
}
acctyp energy=(acctyp)0;
acctyp4 f;
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
@ -132,7 +132,7 @@ __kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
virial[i]=(acctyp)0;
__syncthreads();
if (ii<inum) {
int i, numj, nbor, nbor_end;
__local int n_stride;
@ -145,7 +145,7 @@ __kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
numtyp factor_lj;
for ( ; nbor<nbor_end; nbor+=n_stride) {
int j=dev_packed[nbor];
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
@ -158,11 +158,11 @@ __kernel void k_ufm_fast(const __global numtyp4 *restrict x_,
numtyp dely = ix.y-jx.y;
numtyp delz = ix.z-jx.z;
numtyp rsq = delx*delx+dely*dely+delz*delz;
if (rsq<uf1[mtype].z) {
numtyp expuf = exp(- rsq * uf1[mtype].y);
numtyp force = factor_lj * uf1[mtype].x * expuf / (1.0 - expuf);
f.x += delx*force;
f.y += dely*force;
f.z += delz*force;

View File

@ -11,7 +11,7 @@
//
// begin : Mon June 12, 2017
// email : andershaf@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"
@ -290,7 +290,7 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_,
if (eflag>0)
energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0);
if (vflag>0) {
virial[0] += delx*delx*force;
virial[1] += dely*dely*force;
@ -471,13 +471,13 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_,
numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z;
int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex);
param_r0sq_ij=param4_ijparam.x;
if (rsq1 > param_r0sq_ij) continue; // still keep this for neigh no and tpa > 1
param_gamma_ij=param4_ijparam.y;
param_r0_ij=param4_ijparam.w;
int nbor_k,k_end;
if (dev_packed==dev_nbor) {
nbor_k=nborj_start-offset_j+offset_k;
@ -619,7 +619,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
param_gamma_ij=param4_ijparam.y;
param_r0_ij = param4_ijparam.w;
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
@ -665,14 +665,14 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
if (rsq2 < param_r0sq_ik) {
param_gamma_ik=param4_ikparam.y;
param_r0_ik=param4_ikparam.w;
int ijkparam=elem2param[jtype*nelements*nelements+itype*nelements+ktype]; //jik
numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex);
param_bigc_ijk=param5_ijkparam.x;
param_costheta_ijk=param5_ijkparam.y;
param_bigb_ijk=param5_ijkparam.z;
param_big2b_ijk=param5_ijkparam.w;
numtyp fjx, fjy, fjz;
//if (evatom==0) {
threebody_half(delr1x,delr1y,delr1z);
@ -774,7 +774,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
param_gamma_ij=param4_ijparam.y;
param_r0_ij=param4_ijparam.w;
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
@ -827,7 +827,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
param_costheta_ijk=param5_ijkparam.y;
param_bigb_ijk=param5_ijkparam.z;
param_big2b_ijk=param5_ijkparam.w;
numtyp fjx, fjy, fjz, fkx, fky, fkz;
threebody(delr1x,delr1y,delr1z,eflag,energy);

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -11,7 +11,7 @@
//
// begin :
// email : nguyentd@ornl.gov
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)

View File

@ -11,7 +11,7 @@
//
// begin :
// email : ndactrung@gmail.com
// ***************************************************************************/
// ***************************************************************************
#if defined(NV_KERNEL) || defined(USE_HIP)
#include "lal_aux_fun1.h"

View File

@ -40,6 +40,13 @@ cmake ${srcdir} \
````
which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below.
## Platform-specific Problems
### Cray
* The Cray compiler wrappers do static linking by default. This seems to break the Kokkos build. You will likely need to set the environment variable `CRAYPE_LINK_TYPE=dynamic` in order to link correctly. Kokkos warns during configure if this is missing.
* The Cray compiler identifies to CMake as Clang, but it sometimes has its own flags that differ from Clang. We try to include all exceptions, but flag errors may occur in which a Clang-specific flag is passed that the Cray compiler does not recognize.
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To do so, download the `kokkos-spack` git repo and add to the package list:
@ -63,6 +70,7 @@ For a complete list of Kokkos options, run:
````
spack info kokkos
````
More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
#### Spack Development
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
@ -73,32 +81,8 @@ spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
A better way to use Spack for doing Kokkos development is the DIY feature of Spack.
If you wish to develop Kokkos itself, go to the Kokkos source folder:
````
spack diy -u cmake kokkos@diy ...
````
where `...` is a Spack spec identifying the exact Kokkos configuration.
This then creates a `spack-build` directory where you can run `make`.
If doing development on a downstream project, you can do almost exactly the same thing.
````
spack diy -u cmake ${myproject}@${myversion} ... ^kokkos...
````
where the `...` are the specs for your project and the desired Kokkos configuration.
Again, a `spack-build` directory will be created where you can run `make`.
Spack has a few idiosyncracies that make building outside of Spack annoying related to Spack forcing use of a compiler wrapper. This can be worked around by having a `-DSpack_WORKAROUND=On` given your CMake. Then add the block of code to your CMakeLists.txt:
````
if (Spack_WORKAROUND)
set(SPACK_CXX $ENV{SPACK_CXX})
if(SPACK_CXX)
set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
set(ENV{CXX} ${SPACK_CXX})
endif()
endif()
````
A better way to use Spack for doing Kokkos development is the dev-build feature of Spack.
For dev-build details, consult the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md).
# Kokkos Keyword Listing
@ -157,6 +141,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_DEPRECATED_CODE
* Whether to enable deprecated code
* BOOL Default: OFF
* Kokkos_ENABLE_EXAMPLES
* Whether to enable building examples
* BOOL Default: OFF
* Kokkos_ENABLE_HPX_ASYNC_DISPATCH
* Whether HPX supports asynchronous dispatch
* BOOL Default: OFF

View File

@ -1,5 +1,59 @@
# Change Log
## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00)
**Features:**
- HIP Support for AMD
- OpenMPTarget Support with clang
- Windows VS19 (Serial) Support [\#1533](https://github.com/kokkos/kokkos/issues/1533)
**Implemented enhancements:**
- generate\_makefile.bash should allow tests to be disabled [\#2886](https://github.com/kokkos/kokkos/issues/2886)
- clang/7+cuda/9 build -Werror-unused parameter error in nightly test [\#2884](https://github.com/kokkos/kokkos/issues/2884)
- ScatterView memory space is not user settable [\#2826](https://github.com/kokkos/kokkos/issues/2826)
- clang/8+cuda/10.0 build error with c++17 [\#2809](https://github.com/kokkos/kokkos/issues/2809)
- warnings.... [\#2805](https://github.com/kokkos/kokkos/issues/2805)
- Kokkos version in cpp define [\#2787](https://github.com/kokkos/kokkos/issues/2787)
- Remove Defunct QThreads Backend [\#2751](https://github.com/kokkos/kokkos/issues/2751)
- Improve Kokkos::fence behavior with multiple execution spaces [\#2659](https://github.com/kokkos/kokkos/issues/2659)
- polylithic\(?\) initialization of Kokkos [\#2658](https://github.com/kokkos/kokkos/issues/2658)
- Unnecessary\(?\) check for host execution space initialization from Cuda initialization [\#2652](https://github.com/kokkos/kokkos/issues/2652)
- Kokkos error reporting failures with CUDA GPUs in exclusive mode [\#2471](https://github.com/kokkos/kokkos/issues/2471)
- atomicMax equivalent \(and other atomics\) [\#2401](https://github.com/kokkos/kokkos/issues/2401)
- Fix alignment for Kokkos::complex [\#2255](https://github.com/kokkos/kokkos/issues/2255)
- Warnings with Cuda 10.1 [\#2206](https://github.com/kokkos/kokkos/issues/2206)
- dual view with Kokkos::ViewAllocateWithoutInitializing [\#2188](https://github.com/kokkos/kokkos/issues/2188)
- Check error code from cudaOccupancyMaxActiveBlocksPerMultiprocessor [\#2172](https://github.com/kokkos/kokkos/issues/2172)
- Add non-member Kokkos::resize/realloc for DualView [\#2170](https://github.com/kokkos/kokkos/issues/2170)
- Construct DualView without initialization [\#2046](https://github.com/kokkos/kokkos/issues/2046)
- Expose is\_assignable to determine if one view can be assigned to another [\#1936](https://github.com/kokkos/kokkos/issues/1936)
- profiling label [\#1935](https://github.com/kokkos/kokkos/issues/1935)
- team\_broadcast of bool failed on CUDA backend [\#1908](https://github.com/kokkos/kokkos/issues/1908)
- View static\_extent [\#660](https://github.com/kokkos/kokkos/issues/660)
- Misleading Kokkos::Cuda::initialize ERROR message when compiled for wrong GPU architecture [\#1944](https://github.com/kokkos/kokkos/issues/1944)
- Cryptic Error When Malloc Fails [\#2164](https://github.com/kokkos/kokkos/issues/2164)
- Drop support for intermediate standards in CMake [\#2336](https://github.com/kokkos/kokkos/issues/2336)
**Fixed bugs:**
- DualView sync\_device with length zero creates cuda errors [\#2946](https://github.com/kokkos/kokkos/issues/2946)
- building with nvcc and clang \(or clang based XL\) as host compiler: "Kokkos::atomic\_fetch\_min\(volatile int \*, int\)" has already been defined [\#2903](https://github.com/kokkos/kokkos/issues/2903)
- Cuda 9.1,10.1 debug builds failing due to -Werror=unused-parameter [\#2880](https://github.com/kokkos/kokkos/issues/2880)
- clang -Werror: Kokkos\_FixedBufferMemoryPool.hpp:140:28: error: unused parameter 'alloc\_size' [\#2869](https://github.com/kokkos/kokkos/issues/2869)
- intel/16.0.1, intel/17.0.1 nightly build failures with debugging enabled [\#2867](https://github.com/kokkos/kokkos/issues/2867)
- intel/16.0.1 debug build errors [\#2863](https://github.com/kokkos/kokkos/issues/2863)
- xl/16.1.1 with cpp14, openmp build, nightly test failures [\#2856](https://github.com/kokkos/kokkos/issues/2856)
- Intel nightly test failures: team\_vector [\#2852](https://github.com/kokkos/kokkos/issues/2852)
- Kokkos Views with intmax/2\<N\<intmax can hang during construction [\#2850](https://github.com/kokkos/kokkos/issues/2850)
- workgraph\_fib test seg-faults with threads backend and hwloc [\#2797](https://github.com/kokkos/kokkos/issues/2797)
- cuda.view\_64bit test hangs on Power8+Kepler37 system - develop and 2.9.00 branches [\#2771](https://github.com/kokkos/kokkos/issues/2771)
- device\_type for Kokkos\_Random ? [\#2693](https://github.com/kokkos/kokkos/issues/2693)
- "More than one tag given" error in Experimental::require\(\) [\#2608](https://github.com/kokkos/kokkos/issues/2608)
- Segfault on Marvell from our finalization stack [\#2542](https://github.com/kokkos/kokkos/issues/2542)
## [3.0.00](https://github.com/kokkos/kokkos/tree/3.0.00) (2020-01-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.9.00...3.0.00)
@ -214,7 +268,7 @@
## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
**Implemented enhancements:**
@ -258,7 +312,7 @@
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5**
**Implemented enhancements:**

View File

@ -2,7 +2,7 @@
# We want to determine if options are given with the wrong case
# In order to detect which arguments are given to compare against
# the list of valid arguments, at the beginning here we need to
# form a list of all the given variables. If it begins with any
# form a list of all the given variables. If it begins with any
# case of KoKkOS, we add it to the list.
@ -25,6 +25,8 @@ SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Needed to simplify syntax of if statements
CMAKE_POLICY(SET CMP0054 NEW)
# Needed to make IN_LIST a valid operator
CMAKE_POLICY(SET CMP0057 NEW)
# Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL)
@ -65,7 +67,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
IF (Spack_WORKAROUND)
IF (Spack_WORKAROUND)
#if we are explicitly using Spack for development,
#nuke the Spack compiler
SET(SPACK_CXX $ENV{SPACK_CXX})
@ -75,7 +77,15 @@ IF(NOT KOKKOS_HAS_TRILINOS)
ENDIF()
ENDif()
IF(NOT DEFINED ${PROJECT_NAME})
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
SET(KOKKOS_INTERNAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --amdgpu-target=gfx906")
ENDIF()
PROJECT(Kokkos CXX)
IF(Kokkos_ENABLE_HIP)
SET(CMAKE_CXX_FLAGS ${KOKKOS_INTERNAL_CMAKE_CXX_FLAGS})
ENDIF()
ENDIF()
ENDIF()
@ -92,16 +102,17 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 0)
set(Kokkos_VERSION_MINOR 1)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0")
MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables")
CMAKE_POLICY(SET CMP0074 NEW)
ENDIF()
# Load either the real TriBITS or a TriBITS wrapper
# Load either the real TriBITS or a TriBITS wrapper
# for certain utility functions that are universal (like GLOBAL_SET)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake)
@ -118,18 +129,14 @@ ENDIF()
# These are the variables we will append to as we go
# I really wish these were regular variables
# but scoping issues can make it difficult
GLOBAL_RESET(KOKKOS_COMPILE_OPTIONS)
GLOBAL_RESET(KOKKOS_LINK_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDA_OPTIONS)
GLOBAL_RESET(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_RESET(KOKKOS_XCOMPILER_OPTIONS)
GLOBAL_SET(KOKKOS_COMPILE_OPTIONS)
GLOBAL_SET(KOKKOS_LINK_OPTIONS)
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS)
# We need to append text here for making sure TPLs
# we import are available for an installed Kokkos
GLOBAL_RESET(KOKKOS_TPL_EXPORTS)
# We need these for controlling the exact -std flag
GLOBAL_RESET(KOKKOS_DONT_ALLOW_EXTENSIONS)
GLOBAL_RESET(KOKKOS_USE_CXX_EXTENSIONS)
GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
GLOBAL_SET(KOKKOS_TPL_EXPORTS)
# Include a set of Kokkos-specific wrapper functions that
# will either call raw CMake or TriBITS
@ -137,6 +144,9 @@ GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables
# to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
# The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES
# 2) Check the compiler ID (type and version)
@ -187,14 +197,21 @@ IF (KOKKOS_HAS_TRILINOS)
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
# we have to match the annoying behavior
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}")
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
ENDFOREACH()
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS})
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
ENDFOREACH()
SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}")
ENDIF()
# Both parent scope and this package
# In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in
# TRILINOS_TOPLEVEL_CXX_FLAGS
@ -203,6 +220,8 @@ IF (KOKKOS_HAS_TRILINOS)
#CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here
#These flags get set up in KOKKOS_PACKAGE_DECL, which means they
#must be configured before KOKKOS_PACKAGE_DECL
SET(KOKKOS_ALL_COMPILE_OPTIONS
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_ALL_COMPILE_OPTIONS}>)
ENDIF()
KOKKOS_PACKAGE_DECL()
@ -250,7 +269,7 @@ INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CM
IF (HAS_PARENT)
FOREACH(DEV Kokkos_ENABLED_DEVICES)
#I would much rather not make these cache variables or global properties, but I can't
#make any guarantees on whether PARENT_SCOPE is good enough to make
#make any guarantees on whether PARENT_SCOPE is good enough to make
#these variables visible where I need them
SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE)
SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON)

View File

@ -1,13 +1,13 @@
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,6 +36,6 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
//
// ************************************************************************
//@HEADER

View File

@ -6,15 +6,20 @@ ifndef KOKKOS_PATH
endif
CXXFLAGS=$(CCFLAGS)
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 1
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#KOKKOS_DEVICES ?= "Pthread"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-GPUS: Vega900,Vega906
# AMD-CPUS: AMDAVX,Ryzen,EPYC
KOKKOS_ARCH ?= ""
# Options: yes,no
@ -35,6 +40,9 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
# Options: rdc
KOKKOS_HIP_OPTIONS ?= ""
# Default settings specific options.
# Options: enable_async_dispatch
KOKKOS_HPX_OPTIONS ?= ""
@ -82,29 +90,50 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
# Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread)
KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads)
KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX)
KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0)
KOKKOS_INTERNAL_USE_SERIAL := 1
endif
endif
endif
# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm)
KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP)
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
KOKKOS_DEVICELIST =
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_DEVICELIST += Serial
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_DEVICELIST += OpenMP
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_DEVICELIST += Threads
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_DEVICELIST += HPX
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_DEVICELIST += Cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_DEVICELIST += HIP
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
ifeq ($(origin CUDA_PATH), undefined)
@ -132,6 +161,7 @@ KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMP
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
# Check Host Compiler if using NVCC through nvcc_wrapper
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -180,20 +210,20 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS =
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# TODO check if cray accepts GNU style warnings
KOKKOS_INTERNAL_COMPILER_WARNINGS =
else
#gcc
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized
endif
endif
endif
@ -230,7 +260,12 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
#KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif
endif
@ -353,11 +388,8 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen)
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC)
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri)
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo)
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji)
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega)
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901)
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
@ -430,6 +462,10 @@ tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEAD
tmp := $(call kokkos_append_header,'\#else')
tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H')
tmp := $(call kokkos_append_header,'\#endif')
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)")
tmp := $(call kokkos_append_header,"")
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
@ -442,9 +478,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM')
tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1')
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC")
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -455,10 +497,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX")
endif
@ -966,6 +1004,14 @@ endif
# Figure out the architecture flag for Cuda.
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_USE_CUDA_ARCH=1
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
@ -974,7 +1020,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
else
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) )
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30")
@ -1042,55 +1098,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_CXXFLAGS += --expt-extended-lambda
endif
endif
# Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901")
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
endif
KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_CXXLDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_TPL_LIBRARY_NAMES += hc_am m
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/ROCm/*.cpp)
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
KOKKOS_CXXFLAGS+=-fgpu-rdc
KOKKOS_LDFLAGS+=-fgpu-rdc
else
KOKKOS_CXXFLAGS+=-fno-gpu-rdc
KOKKOS_LDFLAGS+=-fno-gpu-rdc
endif
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
@ -1141,7 +1191,7 @@ endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
@ -1149,6 +1199,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -1178,22 +1229,6 @@ endif
KOKKOS_TPL_LIBRARY_NAMES += pthread
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(QTHREADS_PATH),)
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib
KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
endif
KOKKOS_LIBS += -lqthread
KOKKOS_TPL_LIBRARY_NAMES += qthread
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp)

View File

@ -55,6 +55,17 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_KernelLaunch.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp
Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
@ -79,13 +90,6 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp
Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
@ -106,10 +110,12 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp

View File

@ -11,8 +11,8 @@ CUDA, HPX, OpenMP and Pthreads as backend programming models with several other
backends in development.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
# Learning about Kokkos
@ -23,7 +23,7 @@ For questions find us on Slack: https://kokkosteam.slack.com or open a github is
For non-public questions send an email to
crtrott(at)sandia.gov
A separate repository with extensive tutorial material can be found under
A separate repository with extensive tutorial material can be found under
https://github.com/kokkos/kokkos-tutorials.
Furthermore, the 'example/tutorial' directory provides step by step tutorial
@ -41,12 +41,12 @@ To learn more about Kokkos consider watching one of our presentations:
# Contributing to Kokkos
We are open and try to encourage contributions from external developers.
We are open and try to encourage contributions from external developers.
To do so please first open an issue describing the contribution and then issue
a pull request against the develop branch. For larger features it may be good
to get guidance from the core development team first through the github issue.
to get guidance from the core development team first through the github issue.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Which means contributing to Kokkos allows anyone else to use your contributions
not just for public purposes but also for closed source commercial projects.
For specifics see the LICENSE file contained in the repository or distribution.
@ -94,9 +94,9 @@ For specifics see the LICENSE file contained in the repository or distribution.
* Intel 18.2.199 (with gcc 4.9.3)
### Primary tested compilers on ARM (Cavium ThunderX2)
* GCC 7.2.0
* GCC 7.2.0
* ARM/Clang 18.4.0
### Other compilers working:
* X86:
* Cygwin 2.1.0 64bit with gcc 4.9.3
@ -110,47 +110,47 @@ For specifics see the LICENSE file contained in the repository or distribution.
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
with warnings as errors. They also are tested with a comprehensive set of
backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...).
We are using the following set of flags:
* GCC:
* GCC:
````
-Wall -Wshadow -pedantic
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body
-Wignored-qualifiers -Wempty-body
-Wclobbered -Wuninitialized
````
* Intel:
* Intel:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* Clang:
* Clang:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
````
* NVCC:
* NVCC:
````
-Wall -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
Other compilers are tested occasionally, in particular when pushing from develop to
Other compilers are tested occasionally, in particular when pushing from develop to
master branch. These are tested less rigorously without `-Werror` and only for a select set of backends.
# Building and Installing Kokkos
Kokkos provide a CMake build system and a raw Makefile build system.
Kokkos provide a CMake build system and a raw Makefile build system.
The CMake build system is strongly encouraged and will be the most rigorously supported in future releases.
Full details are given in the [build instructions](BUILD.md). Basic setups are shown here:
## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
````
cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \
@ -162,9 +162,9 @@ cmake $srcdir \
````
then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages.
To validate the Kokkos build, configure with
To validate the Kokkos build, configure with
````
-DKokkos_ENABLE_TESTS=On
-DKokkos_ENABLE_TESTS=On
````
and run `make test` after completing the build.
@ -209,7 +209,7 @@ For a complete list of Kokkos options, run:
spack info kokkos
````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
````
spack find -p kokkos ...
@ -217,7 +217,7 @@ spack find -p kokkos ...
where `...` is the unique spec identifying the particular Kokkos configuration and version.
## Raw Makefile
## Raw Makefile
A bash script is provided to generate raw makefiles.
To install Kokkos as a library create a build directory and run the following
````
@ -240,33 +240,33 @@ changing the device type for which to build.
For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package.
The main reason is that you may otherwise need many different
configurations of Kokkos installed depending on the required compile time
features an application needs. For example there is only one default
features an application needs. For example there is only one default
execution space, which means you need different installations to have OpenMP
or Pthreads as the default space. Also for the CUDA backend there are certain
choices, such as allowing relocatable device code, which must be made at
choices, such as allowing relocatable device code, which must be made at
installation time. Building Kokkos inline uses largely the same process
as compiling an application against an installed Kokkos library.
as compiling an application against an installed Kokkos library.
For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
# Kokkos and CUDA UVM
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
* In multi socket multi GPU machines without NVLINK, UVM defaults
* In multi socket multi GPU machines without NVLINK, UVM defaults
to using zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.

View File

@ -537,6 +537,145 @@ struct rand<Generator, Kokkos::complex<double> > {
}
};
template <class DeviceType>
class Random_XorShift1024_Pool;
namespace Impl {
template <bool UseCArrayState>
struct Random_XorShift1024_State {
uint64_t state_[16];
KOKKOS_DEFAULTED_FUNCTION
Random_XorShift1024_State() = default;
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx) {
for (int i = 0; i < 16; i++) state_[i] = v(state_idx, i);
}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i]; }
};
template <>
struct Random_XorShift1024_State<false> {
uint64_t* state_;
const int stride_;
KOKKOS_FUNCTION
Random_XorShift1024_State() : state_(nullptr), stride_(1){};
template <class StateViewType>
KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v,
int state_idx)
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i * stride_]; }
KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i * stride_]; }
};
template <class ExecutionSpace>
struct Random_XorShift1024_UseCArrayState : std::true_type {};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Cuda> : std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::HIP>
: std::false_type {};
#endif
#ifdef KOKKOS_ENABLE_OPENMPTARGET
template <>
struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
: std::false_type {};
#endif
template <class ExecutionSpace>
struct Random_UniqueIndex {
using locks_view_type = View<int*, ExecutionSpace>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = ExecutionSpace::hardware_thread_id();
#else
const int i = ExecutionSpace::impl_hardware_thread_id();
#endif
return i;
#else
return 0;
#endif
}
};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_UniqueIndex<Kokkos::Cuda> {
using locks_view_type = View<int*, Kokkos::Cuda>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
const int i_offset =
(hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z +
hipThreadIdx_z;
int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z +
hipBlockIdx_z) *
hipBlockDim_x * hipBlockDim_y * hipBlockDim_z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif
} // namespace Impl
template <class DeviceType>
class Random_XorShift64_Pool;
@ -550,10 +689,10 @@ class Random_XorShift64 {
public:
typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffff / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffLL / 2 - 1) };
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION
Random_XorShift64(uint64_t state, int state_idx = 0)
@ -637,10 +776,12 @@ class Random_XorShift64 {
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
@ -648,10 +789,12 @@ class Random_XorShift64 {
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
@ -662,6 +805,11 @@ class Random_XorShift64 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -669,7 +817,7 @@ class Random_XorShift64 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
return U * sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -681,9 +829,10 @@ class Random_XorShift64 {
template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool {
private:
typedef View<int*, DeviceType> lock_type;
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<uint64_t*, DeviceType> state_data_type;
lock_type locks_;
locks_type locks_;
state_data_type state_;
int num_states_;
@ -695,11 +844,8 @@ class Random_XorShift64_Pool {
Random_XorShift64_Pool() { num_states_ = 0; }
Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
init(seed, execution_space().concurrency());
}
KOKKOS_INLINE_FUNCTION
@ -719,11 +865,11 @@ class Random_XorShift64_Pool {
num_states_ = num_states;
locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_);
locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename lock_type::HostMirror h_lock = create_mirror_view(locks_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
// Execute on the HostMirror's default execution space.
Random_XorShift64<typename state_data_type::HostMirror::execution_space>
@ -746,13 +892,8 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = DeviceType::hardware_thread_id();
;
#else
const int i = DeviceType::impl_hardware_thread_id();
;
#endif
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift64<DeviceType>(state_(i), i);
}
@ -765,35 +906,35 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
}
};
template <class DeviceType>
class Random_XorShift1024_Pool;
template <class DeviceType>
class Random_XorShift1024 {
using execution_space = typename DeviceType::execution_space;
private:
int p_;
const int state_idx_;
uint64_t state_[16];
Impl::Random_XorShift1024_State<
Impl::Random_XorShift1024_UseCArrayState<execution_space>::value>
state_;
friend class Random_XorShift1024_Pool<DeviceType>;
public:
typedef Random_XorShift1024_Pool<DeviceType> pool_type;
typedef DeviceType device_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
constexpr static uint32_t MAX_URAND = std::numeric_limits<uint32_t>::max();
constexpr static uint64_t MAX_URAND64 = std::numeric_limits<uint64_t>::max();
constexpr static int32_t MAX_RAND = std::numeric_limits<int32_t>::max();
constexpr static int64_t MAX_RAND64 = std::numeric_limits<int64_t>::max();
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p), state_idx_(state_idx) {
for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i);
}
: p_(p), state_idx_(state_idx), state_(state, state_idx) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
@ -876,10 +1017,12 @@ class Random_XorShift1024 {
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
float frand() { return urand64() / static_cast<float>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
float frand(const float& range) {
return range * urand64() / static_cast<float>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
@ -887,10 +1030,12 @@ class Random_XorShift1024 {
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
double drand() { return urand64() / static_cast<double>(MAX_URAND64); }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
double drand(const double& range) {
return range * urand64() / static_cast<double>(MAX_URAND64);
}
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
@ -901,6 +1046,11 @@ class Random_XorShift1024 {
// number
KOKKOS_INLINE_FUNCTION
double normal() {
#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP
using std::sqrt;
#else
using ::sqrt;
#endif
double S = 2.0;
double U;
while (S >= 1.0) {
@ -908,7 +1058,7 @@ class Random_XorShift1024 {
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
return U * sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
@ -920,10 +1070,12 @@ class Random_XorShift1024 {
template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool {
private:
using execution_space = typename DeviceType::execution_space;
typedef View<int*, execution_space> locks_type;
typedef View<int*, DeviceType> int_view_type;
typedef View<uint64_t * [16], DeviceType> state_data_type;
int_view_type locks_;
locks_type locks_;
state_data_type state_;
int_view_type p_;
int num_states_;
@ -939,11 +1091,8 @@ class Random_XorShift1024_Pool {
inline Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
init(seed, DeviceType::max_hardware_threads());
#else
init(seed, DeviceType::impl_max_hardware_threads());
#endif
init(seed, execution_space().concurrency());
}
KOKKOS_INLINE_FUNCTION
@ -965,12 +1114,12 @@ class Random_XorShift1024_Pool {
inline void init(uint64_t seed, int num_states) {
if (seed == 0) seed = uint64_t(1318319);
num_states_ = num_states;
locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_);
locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename int_view_type::HostMirror h_lock = create_mirror_view(locks_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
typename int_view_type::HostMirror h_p = create_mirror_view(p_);
// Execute on the HostMirror's default execution space.
@ -997,11 +1146,8 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state() const {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
const int i = DeviceType::hardware_thread_id();
#else
const int i = DeviceType::impl_hardware_thread_id();
#endif
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift1024<DeviceType>(state_, p_(i), i);
};
@ -1014,482 +1160,11 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
p_(state.state_idx_) = state.p_;
p_(state.state_idx_) = state.p_;
locks_(state.state_idx_) = 0;
}
};
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
template <>
class Random_XorShift1024<Kokkos::Cuda> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Cuda>;
public:
typedef Kokkos::Cuda device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<Kokkos::Cuda>::Random_XorShift64_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Cuda>
Random_XorShift64_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Cuda>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Cuda>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift64<Kokkos::Cuda>& state) const {
state_(state.state_idx_) = state.state_;
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<Kokkos::Cuda>::Random_XorShift1024_Pool(
uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Cuda>
Random_XorShift1024_Pool<Kokkos::Cuda>::get_state() const {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Cuda>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(
const Random_XorShift1024<Kokkos::Cuda>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
#if defined(KOKKOS_ENABLE_ROCM)
template <>
class Random_XorShift1024<Kokkos::Experimental::ROCm> {
private:
int p_;
const int state_idx_;
uint64_t* state_;
const int stride_;
friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>;
public:
typedef Kokkos::Experimental::ROCm device_type;
typedef Random_XorShift1024_Pool<device_type> pool_type;
enum { MAX_URAND = 0xffffffffU };
enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
enum { MAX_RAND = static_cast<int>(0xffffffffU / 2) };
enum { MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL / 2 - 1) };
KOKKOS_INLINE_FUNCTION
Random_XorShift1024(const typename pool_type::state_data_type& state, int p,
int state_idx = 0)
: p_(p),
state_idx_(state_idx),
state_(&state(state_idx, 0)),
stride_(state.stride_1()) {}
KOKKOS_INLINE_FUNCTION
uint32_t urand() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
uint64_t tmp =
(state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL;
tmp = tmp >> 16;
return static_cast<uint32_t>(tmp & MAX_URAND);
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64() {
uint64_t state_0 = state_[p_ * stride_];
uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_];
state_1 ^= state_1 << 31;
state_1 ^= state_1 >> 11;
state_0 ^= state_0 >> 30;
return ((state_[p_ * stride_] = state_0 ^ state_1) *
1181783497276652981LL) -
1;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& range) {
const uint32_t max_val = (MAX_URAND / range) * range;
uint32_t tmp = urand();
while (tmp >= max_val) urand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint32_t urand(const uint32_t& start, const uint32_t& end) {
return urand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& range) {
const uint64_t max_val = (MAX_URAND64 / range) * range;
uint64_t tmp = urand64();
while (tmp >= max_val) urand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
uint64_t urand64(const uint64_t& start, const uint64_t& end) {
return urand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int rand() { return static_cast<int>(urand() / 2); }
KOKKOS_INLINE_FUNCTION
int rand(const int& range) {
const int max_val = (MAX_RAND / range) * range;
int tmp = rand();
while (tmp >= max_val) rand();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int rand(const int& start, const int& end) {
return rand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64() { return static_cast<int64_t>(urand64() / 2); }
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& range) {
const int64_t max_val = (MAX_RAND64 / range) * range;
int64_t tmp = rand64();
while (tmp >= max_val) rand64();
return tmp % range;
}
KOKKOS_INLINE_FUNCTION
int64_t rand64(const int64_t& start, const int64_t& end) {
return rand64(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
float frand() { return 1.0f * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
float frand(const float& start, const float& end) {
return frand(end - start) + start;
}
KOKKOS_INLINE_FUNCTION
double drand() { return 1.0 * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& range) { return range * urand64() / MAX_URAND64; }
KOKKOS_INLINE_FUNCTION
double drand(const double& start, const double& end) {
return frand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * log(S) / S);
}
KOKKOS_INLINE_FUNCTION
double normal(const double& mean, const double& std_dev = 1.0) {
return mean + normal() * std_dev;
}
};
template <>
inline Random_XorShift64_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift64<Kokkos::Experimental::ROCm>
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i), i);
#else
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift64<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
return;
#endif
}
template <>
inline Random_XorShift1024_Pool<
Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) {
num_states_ = 0;
init(seed, 4 * 32768);
}
template <>
KOKKOS_INLINE_FUNCTION Random_XorShift1024<Kokkos::Experimental::ROCm>
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const {
#ifdef __HCC_ACCELERATOR__
const int i_offset =
(threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z;
int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) *
blockDim_x * blockDim_y * blockDim_z +
i_offset) %
num_states_;
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
i += blockDim_x * blockDim_y * blockDim_z;
if (i >= num_states_) {
i = i_offset;
}
}
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i);
#else
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0);
#endif
}
template <>
KOKKOS_INLINE_FUNCTION void
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(
const Random_XorShift1024<Kokkos::Experimental::ROCm>& state) const {
#ifdef __HCC_ACCELERATOR__
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
locks_(state.state_idx_) = 0;
return;
#endif
}
#endif
namespace Impl {
template <class ViewType, class RandomPool, int loops, int rank,
@ -2043,7 +1718,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
int64_t LDA = a.extent(0);
if (LDA > 0)
parallel_for((LDA + 127) / 128,
parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_range<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, range));
@ -2055,7 +1730,7 @@ void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type end) {
int64_t LDA = a.extent(0);
if (LDA > 0)
parallel_for((LDA + 127) / 128,
parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, begin, end));

View File

@ -201,7 +201,7 @@ class BinSort {
bool sort_within_bins;
public:
BinSort() {}
BinSort() = default;
//----------------------------------------
// Constructor: takes the keys, the binning_operator and optionally whether to
@ -327,7 +327,7 @@ class BinSort {
Kokkos::RangePolicy<execution_space>(0, len), functor);
}
Kokkos::fence();
execution_space().fence();
}
template <class ValuesViewType>
@ -349,14 +349,14 @@ class BinSort {
public:
KOKKOS_INLINE_FUNCTION
void operator()(const bin_count_tag& tag, const int& i) const {
void operator()(const bin_count_tag& /*tag*/, const int i) const {
const int j = range_begin + i;
bin_count_atomic(bin_op.bin(keys, j))++;
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_offset_tag& tag, const int& i, value_type& offset,
const bool& final) const {
void operator()(const bin_offset_tag& /*tag*/, const int i,
value_type& offset, const bool& final) const {
if (final) {
bin_offsets(i) = offset;
}
@ -364,7 +364,7 @@ class BinSort {
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_binning_tag& tag, const int& i) const {
void operator()(const bin_binning_tag& /*tag*/, const int i) const {
const int j = range_begin + i;
const int bin = bin_op.bin(keys, j);
const int count = bin_count_atomic(bin)++;
@ -373,7 +373,7 @@ class BinSort {
}
KOKKOS_INLINE_FUNCTION
void operator()(const bin_sort_bins_tag& tag, const int& i) const {
void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const {
auto bin_size = bin_count_const(i);
if (bin_size <= 1) return;
int upper_bound = bin_offsets(i) + bin_size;
@ -381,7 +381,7 @@ class BinSort {
while (!sorted) {
sorted = true;
int old_idx = sort_order(bin_offsets(i));
int new_idx;
int new_idx = 0;
for (int k = bin_offsets(i) + 1; k < upper_bound; k++) {
new_idx = sort_order(k);
@ -446,7 +446,7 @@ struct BinOp3D {
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
BinOp3D() {}
BinOp3D() = default;
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[]) {

View File

@ -20,16 +20,38 @@ KOKKOS_ADD_TEST_LIBRARY(
HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h
SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc
)
KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
# WORKAROUND FOR HIPCC
IF(Kokkos_ENABLE_HIP)
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906")
ELSE()
TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0")
ENDIF()
TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11)
SET(SOURCES
UnitTestMain.cpp
TestCuda.cpp
)
UnitTestMain.cpp
)
IF(Kokkos_ENABLE_OPENMP)
LIST( APPEND SOURCES
TestOpenMP.cpp
TestOpenMP_Sort1D.cpp
TestOpenMP_Sort3D.cpp
TestOpenMP_SortDynamicView.cpp
TestOpenMP_Random.cpp
)
ENDIF()
IF(Kokkos_ENABLE_HIP)
LIST( APPEND SOURCES
TestHIP.cpp
)
ENDIF()
IF(Kokkos_ENABLE_CUDA)
LIST( APPEND SOURCES
TestCuda.cpp
)
ENDIF()

View File

@ -44,7 +44,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o
OBJ_OPENMP = TestOpenMP.o TestOpenMP_Random.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o
TARGETS += KokkosAlgorithms_UnitTest_OpenMP
TEST_TARGETS += test-openmp
endif

View File

@ -59,11 +59,15 @@
namespace Test {
void cuda_test_random_xorshift64(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda> >(num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
}
void cuda_test_random_xorshift1024(int num_draws) {
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda> >(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Cuda>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<
Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>>>(num_draws);
}
#define CUDA_RANDOM_XORSHIFT64(num_draws) \

View File

@ -0,0 +1,83 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_HIP
#include <cstdint>
#include <iostream>
#include <iomanip>
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <TestRandom.hpp>
#include <TestSort.hpp>
namespace Test {
void hip_test_random_xorshift64(size_t num_draws) {
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::HIP>>(
num_draws);
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
void hip_test_random_xorshift1024(size_t num_draws) {
Impl::test_random<
Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::HIP>>(num_draws);
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Device<
Kokkos::Experimental::HIP, Kokkos::Experimental::HIPSpace>>>(num_draws);
}
TEST(hip, Random_XorShift64) { hip_test_random_xorshift64(132141141); }
TEST(hip, Random_XorShift1024_0) { hip_test_random_xorshift1024(52428813); }
TEST(hip, SortUnsigned) {
Impl::test_sort<Kokkos::Experimental::HIP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTHIP_PREVENT_LINK_ERROR() {}
#endif /* #ifdef KOKKOS_ENABLE_HIP */

View File

@ -55,30 +55,8 @@
namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort<Kokkos::OpenMP>(); }
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_SORT_UNSIGNED(size) \
TEST(openmp, SortUnsigned) { \
Impl::test_sort<Kokkos::OpenMP, unsigned>(size); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
OPENMP_SORT_UNSIGNED(171)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
#undef OPENMP_SORT_UNSIGNED
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}

View File

@ -0,0 +1,77 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <iomanip>
namespace Test {
#define OPENMP_RANDOM_XORSHIFT64(num_draws) \
TEST(openmp, Random_XorShift64) { \
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \
TEST(openmp, Random_XorShift1024) { \
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::OpenMP> >( \
num_draws); \
}
OPENMP_RANDOM_XORSHIFT64(10240000)
OPENMP_RANDOM_XORSHIFT1024(10130144)
#undef OPENMP_RANDOM_XORSHIFT64
#undef OPENMP_RANDOM_XORSHIFT1024
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned1D) {
Impl::test_1D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsigned3D) {
Impl::test_3D_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

View File

@ -0,0 +1,65 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Macros.hpp>
#ifdef KOKKOS_ENABLE_OPENMP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
#include <TestRandom.hpp>
#include <TestSort.hpp>
#include <iomanip>
namespace Test {
TEST(openmp, SortUnsignedDynamicView) {
Impl::test_dynamic_view_sort<Kokkos::OpenMP, unsigned>(171);
}
} // namespace Test
#else
void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
#endif

Some files were not shown because too many files have changed in this diff Show More