Merge branch 'lammps:develop' into thermo_mod_range

This commit is contained in:
hammondkd 2022-10-13 09:01:06 -05:00 committed by GitHub
commit 41c0b39365
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
767 changed files with 43285 additions and 29129 deletions

View File

@ -105,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
else()
set(CMAKE_TUNE_DEFAULT "-xHost")
set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196")
endif()
endif()
endif()
@ -854,8 +854,11 @@ if(BUILD_SHARED_LIBS OR PKG_PYTHON)
find_package(Python COMPONENTS Interpreter)
endif()
if(Python_EXECUTABLE)
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python)
install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} setup.py build -b ${CMAKE_BINARY_DIR}/python install --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR}/ WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR})")
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/lib)
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/src)
file(COPY ${LAMMPS_SOURCE_DIR}/version.h DESTINATION ${CMAKE_BINARY_DIR}/python/src)
file(COPY ${LAMMPS_PYTHON_DIR}/README ${LAMMPS_PYTHON_DIR}/pyproject.toml ${LAMMPS_PYTHON_DIR}/setup.py ${LAMMPS_PYTHON_DIR}/lammps DESTINATION ${CMAKE_BINARY_DIR}/python/lib)
install(CODE "if(\"\$ENV{DESTDIR}\" STREQUAL \"\")\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX})\n else()\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR})\n endif()")
endif()
endif()

View File

@ -47,8 +47,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.6.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "0ec97fc0c356dd65bd2487defe81a7bf" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "84991eca9f066383abe119a5bc7a11c4" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
ExternalProject_Add(kokkos_build
@ -72,7 +72,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.6.01 REQUIRED CONFIG)
find_package(Kokkos 3.7.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
else()

View File

@ -1,6 +1,6 @@
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.fix10Oct.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_MD5 "ad6c8597076479bd55059f5947d51acc" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
set(PACELIB_MD5 "766cebcc0e5c4b8430c2f3cd202d9905" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
mark_as_advanced(PACELIB_URL)
mark_as_advanced(PACELIB_MD5)

View File

@ -1,4 +1,4 @@
# preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes)
# preset that will enable the classic Intel compilers with support for MPI and OpenMP (on Linux boxes)
set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE)
set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE)
@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C "icc" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_CXX "icpc" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)

View File

@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C "icx" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_CXX "icpx" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)

View File

@ -483,6 +483,9 @@ They must be specified in uppercase.
* - **Arch-ID**
- **HOST or GPU**
- **Description**
* - NATIVE
- HOST
- Local machine
* - AMDAVX
- HOST
- AMD 64-bit x86 CPU (AVX 1)
@ -522,9 +525,21 @@ They must be specified in uppercase.
* - BDW
- HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem)
* - SKL
- HOST
- Intel Skylake Client CPU
* - SKX
- HOST
- Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem)
- Intel Skylake Xeon Server CPU (AVX512)
* - ICL
- HOST
- Intel Ice Lake Client CPU (AVX512)
* - ICX
- HOST
- Intel Ice Lake Xeon Server CPU (AVX512)
* - SPR
- HOST
- Intel Sapphire Rapids Xeon Server CPU (AVX512)
* - KNC
- HOST
- Intel Knights Corner Xeon Phi
@ -596,7 +611,10 @@ They must be specified in uppercase.
- AMD GPU MI100 GFX908
* - VEGA90A
- GPU
- AMD GPU
- AMD GPU MI200 GFX90A
* - INTEL_GEN
- GPU
- SPIR64-based devices, e.g. Intel GPUs, using JIT
* - INTEL_DG1
- GPU
- Intel Iris XeMAX GPU
@ -611,9 +629,12 @@ They must be specified in uppercase.
- Intel GPU Gen12LP
* - INTEL_XEHP
- GPU
- Intel GPUs Xe-HP
- Intel GPU Xe-HP
* - INTEL_PVC
- GPU
- Intel GPU Ponte Vecchio
This list was last updated for version 3.5.0 of the Kokkos library.
This list was last updated for version 3.7.0 of the Kokkos library.
.. tabs::

View File

@ -91,7 +91,7 @@ quantities.
+--------------+-----------------------------------------------------+--------------------------------------+
| *charge* | charge | atomic system with charges |
+--------------+-----------------------------------------------------+--------------------------------------+
| *dielectric* | dipole, area, curvature | system with surface polarization |
| *dielectric* | normx normy normz area/patch ed em epsilon curv | system with surface polarization |
+--------------+-----------------------------------------------------+--------------------------------------+
| *dipole* | charge and dipole moment | system with dipolar particles |
+--------------+-----------------------------------------------------+--------------------------------------+
@ -180,16 +180,21 @@ vector with the 3 diameters of the ellipsoid and a quaternion 4-vector
with its orientation.
For the *dielectric* style, each particle can be either a physical
particle (e.g. an ion), or an interface particle representing a
boundary element. For physical particles, the per-particle properties
are the same as atom_style full. For interface particles, in addition
to these properties, each particle also has an area, a normal unit
vector, a mean local curvature, the mean and difference of the
dielectric constants of two sides of the interface, and the local
dielectric constant at the boundary element. The distinction between
the physical and interface particles is only meaningful when :doc:`fix
polarize <fix_polarize>` commands are applied to the interface
particles.
particle (e.g. an ion), or an interface particle representing a boundary
element between two regions of different dielectric constant. For
interface particles, in addition to the properties associated with
atom_style full, each particle also should be assigned a normal unit
vector (defined by normx, normy, normz), an area (area/patch), the
difference and mean of the dielectric constants of two sides of the
interface along the direction of the normal vector (ed and em), the
local dielectric constant at the boundary element (epsilon), and a mean
local curvature (curv). Physical particles must be assigned these
values, as well, but only their local dielectric constants will be used;
see documentation for associated :doc:`pair styles <pair_dielectric>`
and :doc:`fixes <fix_polarize>`. The distinction between the physical
and interface particles is only meaningful when :doc:`fix polarize
<fix_polarize>` commands are applied to the interface particles. This
style is part of the DIELECTRIC package.
For the *dipole* style, a point dipole is defined for each point
particle. Note that if you wish the particles to be finite-size

View File

@ -16,11 +16,11 @@ Syntax
.. parsed-literal::
fix ID group-ID style nevery tolerance ...
fix ID group-ID style nevery tolerance
* ID, group-ID are documented in :doc:`fix <fix>` command
* style = *polarize/bem/gmres* or *polarize/bem/icc* or *polarize/functional*
* Nevery = this fixed is invoked every this many timesteps
* nevery = this fixed is invoked every this many timesteps
* tolerance = the relative tolerance for the iterative solver to stop
@ -46,44 +46,53 @@ Description
These fixes compute induced charges at the interface between two
impermeable media with different dielectric constants. The interfaces
need to be discretized into vertices, each representing a boundary element.
The vertices are treated as if they were regular atoms or particles.
:doc:`atom_style dielectric <atom_style>` should be used since it defines
the additional properties of each interface particle such as
interface normal vectors, element areas, and local dielectric mismatch.
These fixes also require the use of :doc:`pair_style <pair_style>` and
:doc:`kspace_style <kspace_style>` with the *dielectric* suffix.
At every time step, given a configuration of the physical charges in the system
(such as atoms and charged particles) these fixes compute and update
the charge of the interface particles. The interfaces are allowed to move
during the simulation with appropriate time integrators (for example,
with :doc:`fix_rigid <fix_rigid>`).
need to be discretized into vertices, each representing a boundary
element. The vertices are treated as if they were regular atoms or
particles. :doc:`atom_style dielectric <atom_style>` should be used
since it defines the additional properties of each interface particle
such as interface normal vectors, element areas, and local dielectric
mismatch. These fixes also require the use of :doc:`pair_style
<pair_style>` and :doc:`kspace_style <kspace_style>` with the
*dielectric* suffix. At every time step, given a configuration of the
physical charges in the system (such as atoms and charged particles)
these fixes compute and update the charge of the interface
particles. The interfaces are allowed to move during the simulation if
the appropriate time integrators are also set (for example, with
:doc:`fix_rigid <fix_rigid>`).
Consider an interface between two media: one with dielectric constant
of 78 (water), the other of 4 (silica). The interface is discretized
into 2000 boundary elements, each represented by an interface particle. Suppose that
each interface particle has a normal unit vector pointing from the silica medium to water.
The dielectric difference along the normal vector is then 78 - 4 = 74,
the mean dielectric value is (78 + 4) / 2 = 41. Each boundary element
also has its area and the local mean curvature (which is used by these fixes
for computing a correction term in the local electric field).
To model charged interfaces, the interface particle will have a non-zero charge value,
Consider an interface between two media: one with dielectric constant of
78 (water), the other of 4 (silica). The interface is discretized into
2000 boundary elements, each represented by an interface
particle. Suppose that each interface particle has a normal unit vector
pointing from the silica medium to water. The dielectric difference
along the normal vector is then 78 - 4 = 74, the mean dielectric value
is (78 + 4) / 2 = 41. Each boundary element also has its area and the
local mean curvature, which is used by these fixes for computing a
correction term in the local electric field. To model charged
interfaces, the interface particle will have a non-zero charge value,
coming from its area and surface charge density.
For non-interface particles such as atoms and charged particles,
the interface normal vectors, element area, and dielectric mismatch are
irrelevant. Their local dielectric value is used to rescale their actual charge
when computing the Coulombic interactions. For instance, for a cation carrying
a charge of +2 (in charge unit) in an implicit solvent with dielectric constant of 40
would have actual charge of +2, and a local dielectric constant value of 40.
It is assumed that the particles cannot pass through the interface during the simulation
so that its local dielectric constant value does not change.
For non-interface particles such as atoms and charged particles, the
interface normal vectors, element area, and dielectric mismatch are
irrelevant and unused. Their local dielectric value is used internally
to rescale their given charge when computing the Coulombic
interactions. For instance, to simulate a cation carrying a charge of +2
(in simulation charge units) in an implicit solvent with a dielectric
constant of 40, the cation's charge should be set to +2 and its local
dielectric constant property (defined in the :doc:`atom_style dielectric
<atom_style>`) should be set to 40; there is no need to manually rescale
charge. This will produce the proper force for any :doc:`pair_style
<pair_style>` with the dielectric suffix. It is assumed that the
particles cannot pass through the interface during the simulation
because the value of the local dielectric constant property does not
change.
There are some example scripts for using these fixes
with LAMMPS in the ``examples/PACKAGES/dielectric`` directory. The README file
therein contains specific details on the system setup. Note that the example data files
show the additional fields (columns) needed for :doc:`atom_style dielectric <atom_style>`
beyond the conventional fields *id*, *mol*, *type*, *q*, *x*, *y*, and *z*.
There are some example scripts for using these fixes with LAMMPS in the
``examples/PACKAGES/dielectric`` directory. The README file therein
contains specific details on the system setup. Note that the example
data files show the additional fields (columns) needed for
:doc:`atom_style dielectric <atom_style>` beyond the conventional fields
*id*, *mol*, *type*, *q*, *x*, *y*, and *z*.
----------
@ -104,22 +113,24 @@ the interface, are computed using the equation:
* :math:`\mathbf{E}(\mathbf{s})` is the electrical field at the vertex
* :math:`\mathbf{n}(\mathbf{s})` is the unit normal vector at the vertex pointing from medium with :math:`\epsilon_2` to that with :math:`\epsilon_1`
Fix *polarize/bem/gmres* employs the Generalized Minimum Residual (GMRES)
as described in :ref:`(Barros) <Barros>` to solve :math:`\sigma_b`.
Fix *polarize/bem/gmres* employs the Generalized Minimum Residual
(GMRES) as described in :ref:`(Barros) <Barros>` to solve
:math:`\sigma_b`.
Fix *polarize/bem/icc* employs the successive over-relaxation algorithm
as described in :ref:`(Tyagi) <Tyagi>` to solve :math:`\sigma_b`.
The iterative solvers would terminate either when the maximum relative change
in the induced charges in consecutive iterations is below the set tolerance,
or when the number of iterations reaches *iter_max* (see below).
The iterative solvers would terminate either when the maximum relative
change in the induced charges in consecutive iterations is below the set
tolerance, or when the number of iterations reaches *iter_max* (see
below).
Fix *polarize/functional* employs the energy functional variation approach
as described in :ref:`(Jadhao) <Jadhao>` to solve :math:`\sigma_b`.
Fix *polarize/functional* employs the energy functional variation
approach as described in :ref:`(Jadhao) <Jadhao>` to solve
:math:`\sigma_b`.
More details on the implementation of these fixes and their recommended use
are described in :ref:`(NguyenTD) <NguyenTD>`.
More details on the implementation of these fixes and their recommended
use are described in :ref:`(NguyenTD) <NguyenTD>`.
Restart, fix_modify, output, run start/stop, minimize info
@ -127,35 +138,78 @@ Restart, fix_modify, output, run start/stop, minimize info
No information about this fix is written to :doc:`binary restart files <restart>`.
The :doc:`fix_modify <fix_modify>` command provides certain options to
control the induced charge solver and the initial values of the interface elements:
The :doc:`fix_modify <fix_modify>` command provides the ability to modify certain
settings:
.. parsed-literal::
*itr_max* arg
arg = maximum number of iterations for convergence
*dielectrics* ediff emean epsilon area charge
ediff = dielectric difference
emean = dielectric mean
epsilon = local dielectric value
aree = element area
charge = real interface charge
ediff = dielectric difference or NULL
emean = dielectric mean or NULL
epsilon = local dielectric value or NULL
area = element area or NULL
charge = real interface charge or NULL
*kspace* arg = yes or no
*rand* max seed
max = range of random induced charges to be generated
seed = random number seed to use when generating random charge
*mr* arg
arg = maximum number of q-vectors to use when solving (GMRES only)
*omega* arg
arg = relaxation parameter to use when iterating (ICC only)
*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element vector
which can be accessed by various :doc:`output commands <Howto_output>`.
The first element is the number of iterations when the solver terminates
(of which the upper bound is set by *iter_max*). The second element is the RMS error.
The *itr_max* keyword sets the max number of iterations to be used for
solving each step.
The *dielectrics* keyword allows properties of the atoms in group
*group-ID* to be modified. Values passed to any of the arguments
(*ediff*, *emean*, *epsilon*, *area*, *charge*) will override existing
values for all atoms in the group *group-ID*. Passing NULL to any of
these arguments will preserve the existing value. Note that setting the
properties of the interface this way will change the properties of all
atoms associated with the fix (all atoms in *group-ID*), so multiple fix
and fix_modify commands would be needed to change the properties of two
different interfaces to different values (one fix and fix_modify for
each interface group).
The *kspace* keyword turns on long range interactions.
If the arguments of the *rand* keyword are set, then the atoms subject
to this fix will be assigned a random initial charge in a uniform
distribution from -*max*/2 to *max*/2, using random number seed *seed*.
The *mr* keyword only applies to *style* = *polarize/bem/gmres*. It is
the maximum number of q-vectors to use when solving for the surface
charge.
The *omega* keyword only applies when using *style* =
*polarize/bem/icc*. It is a relaxation parameter defined in
:ref:`(Tyagi) <Tyagi>` that should generally be set between 0 and 2.
Note that the local dielectric constant (epsilon) can also be set
independently using the :doc:`set <set>` command.
----------
*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element
vector which can be accessed by various :doc:`output commands
<Howto_output>`. The first element is the number of iterations when the
solver terminates (of which the upper bound is set by *iter_max*). The
second element is the RMS error.
Restrictions
""""""""""""
These fixes are part of the DIELECTRIC package. It is only enabled
These fixes are part of the DIELECTRIC package. They are only enabled
if LAMMPS was built with that package, which requires that also the
KSPACE package is installed. See the :doc:`Build package
<Build_package>` page for more info.
Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only support
:doc:`units <units>` *lj*, *real*, *metal*, *si* and *nano* at the moment.
Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only
support :doc:`units <units>` *lj*, *real*, *metal*, *si* and *nano* at
the moment.
Related commands
@ -171,6 +225,15 @@ Default
*iter_max* = 20
*kspace* = yes
*omega* = 0.7 (ICC only)
*mr* = \# atoms in group *group-ID* minus 1 (GMRES only)
No random charge initialization happens by default.
----------
.. _Barros:

View File

@ -76,16 +76,19 @@ Description
"""""""""""
All these pair styles are derived from the corresponding pair styles
without the *dielectric*\ suffix. In addition to computing atom forces
and energies, these pair styles compute the electrical field vector
at each atom, which are to be used in the :doc:`fix polarize <fix_polarize>` commands.
without the *dielectric* suffix. In addition to computing atom forces
and energies, these pair styles compute the electric field vector at
each atom, which are intended to be used by the :doc:`fix polarize
<fix_polarize>` commands to compute induced charges at interfaces
between two regions of different dielectric constant.
These pair styles should be used with :doc:`atom_style dielectric <atom_style>`,
which uses atom charges rescaled by their local dielectric constant.
These pair styles should be used with :doc:`atom_style dielectric
<atom_style>`.
The styles lj/cut/coul/long/dielectric, lj/cut/coul/msm/dielectric, and
lj/long/coul/long/dielectric should be used with their kspace style counterparts,
namely, pppm/dielectric, pppm/disp/dielectric, and msm/dielectric, respectively.
lj/long/coul/long/dielectric should be used with their kspace style
counterparts, namely, pppm/dielectric, pppm/disp/dielectric, and
msm/dielectric, respectively.
----------
@ -97,24 +100,27 @@ Mixing, shift, table, tail correction, restart, rRESPA info
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
For atom type pairs I,J and I != J, the epsilon and sigma coefficients
and cutoff distances for this pair style can be mixed. The default
mix value is *geometric*\ . See the "pair_modify" command for details.
and cutoff distances for this pair style can be mixed. The default mix
algorithm is *geometric*\ . See the :doc:`pair_modify <pair_modify>`"
command for details.
The :doc:`pair_modify <pair_modify>` table option is not relevant
for this pair style.
This pair style writes its information to :doc:`binary restart files <restart>`, so pair_style and pair_coeff commands do not need
to be specified in an input script that reads a restart file.
These pair styles write its information to :doc:`binary restart files
<restart>`, so pair_style and pair_coeff commands do not need to be
specified in an input script that reads a restart file.
This pair style can only be used via the *pair* keyword of the
These pair styles can only be used via the *pair* keyword of the
:doc:`run_style respa <run_style>` command. It does not support the
*inner*, *middle*, *outer* keywords.
Restrictions
""""""""""""
These styles are part of the DIELECTRIC package. They are only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
These styles are part of the DIELECTRIC package. They are only enabled
if LAMMPS was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
Related commands
""""""""""""""""

View File

@ -600,6 +600,7 @@ Cummins
Cundall
cundall
Curk
curv
Cusentino
customIDs
cutbond

View File

@ -2,7 +2,7 @@ This folder contains some example data and input scripts for the DIELECTRIC pack
Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface polarization effects into large-scale coarse-grained molecular dynamics simulation, Computer Physics Communications 2019, 241, 80--91.
- data.confined : two point opposite charges confined between two interfaces (epsilon1=2/epsilon2=10/epsilon2=2)
- data.confined : two point opposite charges confined between two interfaces (epsilon2=2/epsilon1=10/epsilon2=2)
- data.sphere : two point opposite charges outside a spherical interface (epsilon_in=1/epsilon2=10)
- in.confined : read in data.confined
@ -10,7 +10,7 @@ Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface po
For "atom_style dielectric" the Atoms section in the data file contains 15 following columns:
id mol type q x y z normx normy normz area_per_patch ed em epsilon curvature
id mol type q x y z normx normy normz area/patch ed em epsilon curvature
where
@ -34,9 +34,13 @@ where
For interface particles, epsilon is set to be em
(the mean dielectric value above).
* area_per_patch: the surface area of the patch (element).
* area/patch: the surface area of the patch (element).
For real charges, this value is irrelevant, can be 1.0.
* curvature: surface mean curvature at the patch.
For example, for spherical interfaces, curvature = 1/spherical radius.
For planar interfaces, curvature = 0.
Note that the properties normx, normy, normz, area/patch, ed, em, and curvature are not
used for the non-interface beads. epsilon is used to scale the charge of any non-interface
ion, see the documentation for pair styles with the dielectric suffix and fix polarize.

View File

@ -7,7 +7,7 @@
# Dielectric constants can be set to be different from the input data file
variable epsilon1 index 20
variable epsilon2 index 8
variable epsilon2 index 10
variable data index data.confined

View File

@ -1,5 +1,5 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for OpenCL
# /* ----------------------------------------------------------------------
# Linux Makefile for Intel oneAPI - Mixed precision
# ------------------------------------------------------------------------- */
# which file will be copied to Makefile.lammps
@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl
LMP_INC = -DLAMMPS_SMALLBIG
OCL_INC =
OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
OCL_LINK = -lOpenCL
OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
-qoverride-limits
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
BIN_DIR = ./
OBJ_DIR = ./

View File

@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP Dump all compiled OpenCL programs with compiler
flags and build logs
GPU_CAST Casting performed on GPU, untested recently
THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested
LAL_SERIALIZE_INIT Force serialization of initialization and compilation
for multiple MPI tasks sharing the same accelerator.
Some accelerator API implementations have had issues
with temporary file conflicts in the past.
GERYON_FORCE_SHARED_MAIN_MEM_ON Should only be used for builds where the
accelerator is guaranteed to share physical
main memory with the host (e.g. integrated
GPU or CPU device). Default behavior is to
auto-detect. Impacts OpenCL only.
GERYON_FORCE_SHARED_MAIN_MEM_OFF Should only be used for builds where the
accelerator is guaranteed to have discrete
physical main memory vs the host (discrete
GPU card). Default behavior is to
auto-detect. Impacts OpenCL only.
------------------------------------------------------------------------------

View File

@ -126,10 +126,13 @@ class UCL_Device {
/// Return the number of devices that support OpenCL
inline int num_devices() { return _num_devices; }
/// Specify whether profiling (device timers) will be used for the device (yes=true)
/// Specify whether profiling (device timers) will be used (yes=true)
/** No-op for CUDA and HIP **/
inline void configure_profiling(const bool profiling_on)
{ _cq_profiling = profiling_on; }
inline void configure_profiling(const bool profiling_on) {
#ifndef GERYON_NO_OCL_MARKERS
_cq_profiling = profiling_on;
#endif
}
/// Set the OpenCL device to the specified device number
/** A context and default command queue will be created for the device *
@ -176,8 +179,8 @@ class UCL_Device {
#ifdef CL_VERSION_2_0
if (_cq_profiling) {
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE,
0};
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE, 0};
_cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props,
&errorv);
} else {
@ -187,8 +190,8 @@ class UCL_Device {
}
#else
if (_cq_profiling)
_cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE,
&errorv);
_cq.back()=clCreateCommandQueue(_context, _cl_device,
CL_QUEUE_PROFILING_ENABLE, &errorv);
else
_cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv);
#endif
@ -403,7 +406,11 @@ class UCL_Device {
// Grabs the properties for all devices
UCL_Device::UCL_Device() {
_device=-1;
#ifndef GERYON_NO_OCL_MARKERS
_cq_profiling=true;
#else
_cq_profiling=false;
#endif
// --- Get Number of Platforms
cl_uint nplatforms;
@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) {
_num_devices = 0;
for (int i=0; i<num_unpart; i++) {
cl_uint num_subdevices = 1;
cl_device_id *subdevice_list = device_list + i;
#ifdef CL_VERSION_1_2
cl_device_affinity_domain adomain;
@ -494,25 +502,29 @@ int UCL_Device::set_platform(int pid) {
props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
props[2]=0;
cl_int err = CL_SUCCESS;
if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices));
if (num_subdevices > 1) {
cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices));
for (cl_uint j=0; j<num_subdevices; j++) {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
err = clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices);
if (err == CL_SUCCESS && num_subdevices > 1) {
subdevice_list = new cl_device_id[num_subdevices];
err = clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices);
if (err != CL_SUCCESS) {
delete[] subdevice_list;
num_subdevices = 1;
subdevice_list = device_list + i;
}
delete[] subdevice_list;
} else {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
}
#endif
for (cl_uint j=0; j<num_subdevices; j++) {
_num_devices++;
_cl_devices.push_back(subdevice_list[j]);
add_properties(subdevice_list[j]);
}
if (num_subdevices > 1) delete[] subdevice_list;
} // for i
#endif
@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) {
double arch = static_cast<double>(minor)/10+major;
if (arch >= 3.0)
op.has_shuffle_support=true;
op.shared_main_memory=_shared_mem_device(device_list);
}
delete[] buffer2;
#endif
op.shared_main_memory=_shared_mem_device(device_list);
_properties.push_back(op);
}

View File

@ -27,11 +27,15 @@
#include "ocl_macros.h"
#include "ocl_device.h"
#ifndef GERYON_NO_OCL_MARKERS
#ifdef CL_VERSION_1_2
#define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event)
#else
#define UCL_OCL_MARKER clEnqueueMarker
#endif
#else
#define UCL_OCL_MARKER(cq,event)
#endif
namespace ucl_opencl {
@ -51,8 +55,10 @@ class UCL_Timer {
inline void clear() {
if (_initialized) {
if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
#endif
has_measured_time = false;
}
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
@ -76,8 +82,10 @@ class UCL_Timer {
/// Start timing on default command queue
inline void start() {
if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
#endif
has_measured_time = false;
}
UCL_OCL_MARKER(_cq,&start_event);
@ -91,17 +99,26 @@ class UCL_Timer {
/// Block until the start event has been reached on device
inline void sync_start() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
if (has_measured_time) {
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
has_measured_time = false;
}
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
#else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
#endif
}
/// Block until the stop event has been reached on device
inline void sync_stop() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
#else
CL_SAFE_CALL(clFinish(_cq));
#endif
has_measured_time = true;
}
@ -126,6 +143,7 @@ class UCL_Timer {
/// Return the time (ms) of last start to stop - Forces synchronization
inline double time() {
if(!has_measured_time) return 0.0;
#ifndef GERYON_NO_OCL_MARKERS
cl_ulong tstart,tend;
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@ -138,6 +156,11 @@ class UCL_Timer {
clReleaseEvent(stop_event);
has_measured_time = false;
return (tend-tstart)*1e-6;
#else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
return 0.0;
#endif
}
/// Return the time (s) of last start to stop - Forces synchronization

View File

@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
BLMF.device->gpu_barrier();
BLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
BCLCSMF.device->gpu_barrier();
BCLCSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
BORNCLMF.device->gpu_barrier();
BORNCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift);
BornCWCST.device->gpu_barrier();
BornCWCST.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift);
BORNCWMF.device->gpu_barrier();
BORNCWMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
BORNMF.device->gpu_barrier();
BORNMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv,
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
host_born3, host_a, host_c, host_d, offset);
BORNMF.device->gpu_barrier();
BORNMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e);
BUCKCMF.device->gpu_barrier();
BUCKCMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
BUCKCLMF.device->gpu_barrier();
BUCKCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_a, host_c, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
BUCKMF.device->gpu_barrier();
BUCKMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
host_a, host_c, offset);
BUCKMF.device->gpu_barrier();
BUCKMF.device->serialize_init();
}
}

View File

@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj,
denom_coul, epsilon, sigma, mix_arithmetic);
CRMMF.device->gpu_barrier();
CRMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon,
sigma, mix_arithmetic);
CRMLMF.device->gpu_barrier();
CRMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
COLLMF.device->gpu_barrier();
COLLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa);
CDEMF.device->gpu_barrier();
CDEMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CDEMF.reinit(ntypes, host_scale);
CDEMF.device->gpu_barrier();
CDEMF.device->serialize_init();
}
}

View File

@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall,
gpu_split, screen, host_cut_coulsq, host_special_coul,
qqrd2e, e_shift, f_shift, alpha);
CDMF.device->gpu_barrier();
CDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale,
init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e);
COULMF.device->gpu_barrier();
COULMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
COULMF.reinit(ntypes, host_scale);
COULMF.device->gpu_barrier();
COULMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLCSMF.device->gpu_barrier();
CLCSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CLCSMF.reinit(ntypes, host_scale);
CLCSMF.device->gpu_barrier();
CLCSMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLMF.device->gpu_barrier();
CLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CLMF.reinit(ntypes, host_scale);
CLMF.device->gpu_barrier();
CLMF.device->serialize_init();
}
}

View File

@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
for (int i=0; i<_procs_per_gpu; i++) {
if (_gpu_rank==i)
flag=compile_kernels();
gpu_barrier();
serialize_init();
}
// check if double precision support is available
@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name,
int last=last_gpu+1;
if (last>gpu->num_devices())
last=gpu->num_devices();
if (gpu->num_platforms()>1) {
std::string pname=gpu->platform_name();
fprintf(screen,"Platform: %s\n",pname.c_str());
}
for (int i=first_gpu; i<last; i++) {
std::string sname;
if (i==first_gpu)

View File

@ -217,6 +217,12 @@ class Device {
inline int gpu_rank() const { return _gpu_rank; }
/// MPI Barrier for gpu
inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
/// Serialize GPU initialization and JIT for unsafe platforms
inline void serialize_init() {
#ifdef LAL_SERIALIZE_INIT
gpu_barrier();
#endif
}
/// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH
inline int gpu_mode() const { return _gpu_mode; }
/// Index of first device used by a node

View File

@ -80,7 +80,7 @@ int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
DPLMF.device->gpu_barrier();
DPLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
DPLSFMF.device->gpu_barrier();
DPLSFMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
DPLJMF.device->gpu_barrier();
DPLJMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, false, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
DPDMF.device->gpu_barrier();
DPDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, true, inum, nall, 300,
maxspecial, cell_size, gpu_split, screen);
DPDTMF.device->gpu_barrier();
DPDTMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMALMF.device->gpu_barrier();
EAMALMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMMF.device->gpu_barrier();
EAMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMFSMF.device->gpu_barrier();
EAMFSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
GLMF.device->gpu_barrier();
GLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
if (gpu_rank==i && world_me!=0)
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
GLMF.device->gpu_barrier();
GLMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma,
host_lj3, host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen);
GBMF.device->gpu_barrier();
GBMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJ96MF.device->gpu_barrier();
LJ96MF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
C2CLMF.device->gpu_barrier();
C2CLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, kappa);
LJCDMF.device->gpu_barrier();
LJCDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
LJCMF.device->gpu_barrier();
LJCMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJCLMF.device->gpu_barrier();
LJCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, host_cut_ljsq);
LJCLMF.device->gpu_barrier();
LJCLMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, order, qqrd2e);
LJCMLMF.device->gpu_barrier();
LJCMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJCubicLMF.device->gpu_barrier();
LJCubicLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_cut_coulsq, host_special_coul, qqrd2e, e_shift,
f_shift, alpha);
LJDMF.device->gpu_barrier();
LJDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJECLMF.device->gpu_barrier();
LJECLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift, host_cut_ljsq);
LJECLMF.device->gpu_barrier();
LJECLMF.device->serialize_init();
}
}

View File

@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift);
LJEMF.device->gpu_barrier();
LJEMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJLMF.device->gpu_barrier();
LJLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
LJLMF.device->gpu_barrier();
LJLMF.device->serialize_init();
}
}

View File

@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq);
LJGRMMF.device->gpu_barrier();
LJGRMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
LJSMTMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
LJSMTMF.device->serialize_init();
}
}

View File

@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types,
host_lj4, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
CMMMF.device->gpu_barrier();
CMMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type,
maxspecial, cell_size, gpu_split, screen,
host_cut_ljsq, host_cut_coulsq, host_special_coul,
qqrd2e, g_ewald);
CMMLMF.device->gpu_barrier();
CMMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_special_coul, qqrd2e,
g_ewald, map_size, max_same);
LJTIP4PLMF.device->gpu_barrier();
LJTIP4PLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
MLMF.device->gpu_barrier();
MLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
MORMF.device->gpu_barrier();
MORMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
split,success);
pppm.device->gpu_barrier();
pppm.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq,
host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen);
REMF.device->gpu_barrier();
REMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
SLMF.device->gpu_barrier();
SLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
if (gpu_rank==i && world_me!=0)
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
SLMF.device->gpu_barrier();
SLMF.device->serialize_init();
}
}

View File

@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall,
sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon,
costheta, map, e2param);
SWMF.device->gpu_barrier();
SWMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs,
special_lj, inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen, tabstyle, ntables, tablength);
TBMF.device->gpu_barrier();
TBMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
ts_gamma, ts_beta, ts_powern, ts_cutsq);
TSMF.device->gpu_barrier();
TSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
ts_powern_del, ts_ca1, ts_cutsq);
TSMMF.device->gpu_barrier();
TSMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
global_epsilon_0, ts_cutsq);
TSZMF.device->gpu_barrier();
TSZMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
UFMLMF.device->gpu_barrier();
UFMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset);
UFMLMF.device->gpu_barrier();
UFMLMF.device->serialize_init();
}
}

View File

@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i
lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
c0, costheta, bigb, big2b, bigc);
VashishtaMF.device->gpu_barrier();
VashishtaMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split,
screen, kappa);
YKCOLLMF.device->gpu_barrier();
YKCOLLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen);
YKMF.device->gpu_barrier();
YKMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
cut_globalsq, cut_innersq, cut_inner,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen);
ZBLMF.device->gpu_barrier();
ZBLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -27,7 +27,7 @@ When configuring your project just set:
-DKokkos_ROOT=${kokkos_install_prefix} \
-DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
````
Note: You may need the following if using some versions of CMake (e.g. 3.12):
Note: You may need the following if your project requires a minimum CMake version older than 3.12:
````cmake
cmake_policy(SET CMP0074 NEW)
````
@ -171,6 +171,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_HPX_ASYNC_DISPATCH
* Whether HPX supports asynchronous dispatch
* BOOL Default: OFF
* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC
* Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details.
* BOOL Default: OFF
* Kokkos_ENABLE_LARGE_MEM_TESTS
* Whether to perform extra large memory tests
* BOOL_Default: OFF
@ -235,6 +238,9 @@ The following options control `find_package` paths for CMake-based TPLs:
## Architecture Keywords
Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`.
* Kokkos_ARCH_NATIVE
* Whether to optimize for the the local CPU architecture
* BOOL Default: OFF
* Kokkos_ARCH_AMDAVX
* Whether to optimize for the AMDAVX architecture
* BOOL Default: OFF
@ -310,12 +316,24 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
* Kokkos_ARCH_POWER9
* Whether to optimize for the POWER9 architecture
* BOOL Default: OFF
* Kokkos_ARCH_ICL
* Whether to optimize for the ICL architecture
* BOOL Default: OFF
* Kokkos_ARCH_ICX
* Whether to optimize for the ICX architecture
* BOOL Default: OFF
* Kokkos_ARCH_SKL
* Whether to optimize for the SKL architecture
* BOOL Default: OFF
* Kokkos_ARCH_SKX
* Whether to optimize for the SKX architecture
* BOOL Default: OFF
* Kokkos_ARCH_SNB
* Whether to optimize for the SNB architecture
* BOOL Default: OFF
* Kokkos_ARCH_SPR
* Whether to optimize for the SPR architecture
* BOOL Default: OFF
* Kokkos_ARCH_TURING75
* Whether to optimize for the TURING75 architecture
* BOOL Default: OFF

View File

@ -1,5 +1,157 @@
# Change Log
## [3.7.00](https://github.com/kokkos/kokkos/tree/3.7.00) (2022-08-22)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.01...3.7.00)
### Features:
- Use non-volatile `join()` member functions and `operator+=` in `parallel_reduce/scan` [\#4931](https://github.com/kokkos/kokkos/pull/4931) [\#4954](https://github.com/kokkos/kokkos/pull/4954) [\#4951](https://github.com/kokkos/kokkos/pull/4951)
- Add `SIMD` sub package (requires C++17) [\#5016](https://github.com/kokkos/kokkos/pull/5016)
- Add `is_finalized()` [\#5247](https://github.com/kokkos/kokkos/pull/5247)
- Promote mathematical functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#4791](https://github.com/kokkos/kokkos/pull/4791)
- Promote `min`, `max`, `clamp`, `minmax` functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#5170](https://github.com/kokkos/kokkos/pull/5170)
- Add `round`, `logb`, `nextafter`, `copysign`, and `signbit` math functions [\#4768](https://github.com/kokkos/kokkos/pull/4768)
- Add `HIPManagedSpace`, similar to `CudaUVMSpace` [\#5112](https://github.com/kokkos/kokkos/pull/5112)
- Accept view construction allocation properties in `create_mirror[_view,_view_and_copy]` and `resize/realloc` [\#5125](https://github.com/kokkos/kokkos/pull/5125) [\#5095](https://github.com/kokkos/kokkos/pull/5095) [\#5035](https://github.com/kokkos/kokkos/pull/5035) [\#4805](https://github.com/kokkos/kokkos/pull/4805) [\#4844](https://github.com/kokkos/kokkos/pull/4844)
- Allow `MemorySpace::allocate()` to be called with execution space [\#4826](https://github.com/kokkos/kokkos/pull/4826)
- Experimental: Compile time view subscriber [\#4197](https://github.com/kokkos/kokkos/pull/4197)
### Backends and Archs Enhancements:
- Add support for Sapphire Rapids Intel architecture [\#5015](https://github.com/kokkos/kokkos/pull/5015)
- Add support for ICX, SKL and ICL Intel architectures [\#5013](https://github.com/kokkos/kokkos/pull/5013) [\#4929](https://github.com/kokkos/kokkos/pull/4929)
- Add arch flags for Intel GPU Ponte Vecchio [\#4932](https://github.com/kokkos/kokkos/pull/4932)
- SYCL: require GPU if GPU architecture was set at configuration time (i.e. do not allow fallback to CPU device) [\#5264](https://github.com/kokkos/kokkos/pull/5264) [\#5222](https://github.com/kokkos/kokkos/pull/5222)
- SYCL: Add `SYCL::sycl_queue()` for interoperability [\#5241](https://github.com/kokkos/kokkos/pull/5241)
- SYCL: Loosen restriction for using built-in `sycl::group_broadcast` [\#4552](https://github.com/kokkos/kokkos/pull/4552)
- SYCL: preserve address space [\#4396](https://github.com/kokkos/kokkos/pull/4396)
- OpenMPTarget: Adding a workaound for team scan [\#5219](https://github.com/kokkos/kokkos/pull/5219)
- OpenMPTarget: Adding logic to skip the kernel launch if `league_size=0` [\#5067](https://github.com/kokkos/kokkos/pull/5067)
- OpenMPTarget: Make sure `Kokkos::abort()` causes abnormal program termination when called on the host-side [\#4808](https://github.com/kokkos/kokkos/pull/4808)
- HIP: Make HIPHostPinnedSpace coarse-grained [\#5152](https://github.com/kokkos/kokkos/pull/5152)
- Refactor OpenMP `parallel_for` implementation to use more native OpenMP constructs [\#4664](https://github.com/kokkos/kokkos/pull/4664)
- Add option to optimize for local CPU architecture `Kokkos_ARCH_NATIVE` [\#4930](https://github.com/kokkos/kokkos/pull/4930)
### Implemented enhancements
- Add command line argument/environment variable to print the configuration [\#5233](https://github.com/kokkos/kokkos/pull/5233)
- Improve error message in view memory access violations [\#4950](https://github.com/kokkos/kokkos/pull/4950)
- Remove unnecessary fences in View initialization [\#4823](https://github.com/kokkos/kokkos/pull/4823)
- Make `View::shmem_size()` device-callable [\#4936](https://github.com/kokkos/kokkos/pull/4936)
- Update numerics support for `__float128` [\#5081](https://github.com/kokkos/kokkos/pull/5081)
- Add `log10` overload for `Kokkos::complex` [\#5009](https://github.com/kokkos/kokkos/pull/5009)
- Add `[[nodiscard]]` to `ScopeGuard` [\#5224](https://github.com/kokkos/kokkos/pull/5224)
- Add structured binding support for `Kokkos::Array` [\#4962](https://github.com/kokkos/kokkos/pull/4962)
- Enable accessing `Kokkos::Array` elements in constant expressions [\#4916](https://github.com/kokkos/kokkos/pull/4916)
- Mark `as_view_of_rank_n` as KOKKOS_FUNCTION [\#5248](https://github.com/kokkos/kokkos/pull/5248)
- Cleanup/rework fence overloads [\#5148](https://github.com/kokkos/kokkos/pull/5148)
- Assert that `Layout` construction from extents is valid in functions taking integer extents [\#5209](https://github.com/kokkos/kokkos/pull/5209)
- Add `fill_random` overload that takes an execution space as first argument [\#5181](https://github.com/kokkos/kokkos/pull/5181)
- Avoid some unnecessary fences in `parallel_reduce/scan` [\#5154](https://github.com/kokkos/kokkos/pull/5154)
- Include `KOKKOS_ENABLE_LIBDL` in options when printing configuration [\#5086](https://github.com/kokkos/kokkos/pull/5086)
- DynRankView: make `layout()` return the same as a corresponding static View [\#5026](https://github.com/kokkos/kokkos/pull/5026)
- Use `_mm_malloc` for icpx [\#5012](https://github.com/kokkos/kokkos/pull/5012)
- Avoid forcing matching execution spaces in `BinSort` constructor and `sort()` [\#4919](https://github.com/kokkos/kokkos/pull/4919)
- Check number of bins in `BinSort` [\#4890](https://github.com/kokkos/kokkos/pull/4890)
- Improve performance in parallel STL-like algorithms [\#4887](https://github.com/kokkos/kokkos/pull/4887) [\#4886](https://github.com/kokkos/kokkos/pull/4886)
- Disable `memset` on A64FX and launch `parallel_for` instead (performance) [\#4884](https://github.com/kokkos/kokkos/pull/4884)
- Allow non-power-of-two team sizes for team reductions and scans [\#4809](https://github.com/kokkos/kokkos/pull/4809)
#### Harmonization of Kokkos execution environment initialization:
- Warn when unable to detect local MPI rank and user explicitly asked for it [\#5263](https://github.com/kokkos/kokkos/pull/5263)
- Refactor parsing of command line arguments and environment variables [\#5221](https://github.com/kokkos/kokkos/pull/5221)
- Refactor device selection at initialization [\#5211](https://github.com/kokkos/kokkos/pull/5211)
- Rename tools settings for consistency [\#5201](https://github.com/kokkos/kokkos/pull/5201)
- Print help only once [\#5128](https://github.com/kokkos/kokkos/pull/5128)
- Update precedence rule in initialization [\#5130](https://github.com/kokkos/kokkos/pull/5130)
- Warn instead of just ignoring user settings when kokkos-tools is disabled [\#5088](https://github.com/kokkos/kokkos/pull/5088)
- Drop numa args in threads backend initialization [\#5127](https://github.com/kokkos/kokkos/pull/5127)
- Warn users when a flag prefixed with -[-]kokkos is not recognized and do not remove it [\#5256](https://github.com/kokkos/kokkos/pull/5256)
- Give back to Core what belongs to Core (aka moving tune_internals option from Tools back to Core) [\#5202](https://github.com/kokkos/kokkos/pull/5202)
#### Build system updates:
- `nvcc_wrapper`: filter out -pedantic-errors from nvcc options [\#5235](https://github.com/kokkos/kokkos/pull/5235)
- `nvcc_wrapper`: add known nvcc option --source-in-ptx [\#5052](https://github.com/kokkos/kokkos/pull/5052)
- Link libdl as interface library [\#5179](https://github.com/kokkos/kokkos/pull/5179)
- Only show GPU architectures with enabled corresponding backend [\#5119](https://github.com/kokkos/kokkos/pull/5119)
- Enable optional external desul build [\#5021](https://github.com/kokkos/kokkos/pull/5021) [\#5132](https://github.com/kokkos/kokkos/pull/5132)
- Export `Kokkos_CXX_STANDARD` variable with CMake [\#5068](https://github.com/kokkos/kokkos/pull/5068)
- Suppress warnings with nvc++ [\#5031](https://github.com/kokkos/kokkos/pull/5031)
- Disallow multiple host architectures in CMake [\#4996](https://github.com/kokkos/kokkos/pull/4996)
- Do not include compiler warning flags in the compile option of the cmake target [\#4989](https://github.com/kokkos/kokkos/pull/4989)
- AOT flags for OpenMPTarget targeting Intel GPUs [\#4915](https://github.com/kokkos/kokkos/pull/4915)
- Repurpose `Kokkos_ARCH_INTEL_GEN` for SYCL to mean JIT to be conforming with OMPT [\#4894](https://github.com/kokkos/kokkos/pull/4894)
- Replace amdgpu-target with offload-arch [\#4874](https://github.com/kokkos/kokkos/pull/4874)
- Do not enable `kokkos_launch_compiler` when `CMAKE_CXX_COMPILER_LAUNCHER` is set [\#4870](https://github.com/kokkos/kokkos/pull/4870)
- Move CMake version check up [\#4797](https://github.com/kokkos/kokkos/pull/4797)
### Incompatibilities:
- Remove `KOKKOS_THREAD_LOCAL` [\#5064](https://github.com/kokkos/kokkos/pull/5064)
- Remove `KOKKOS_ENABLE_POSIX_MEMALIGN` [\#5011](https://github.com/kokkos/kokkos/pull/5011)
- Remove unused `KOKKOS_ENABLE_TM` [\#4995](https://github.com/kokkos/kokkos/pull/4995)
- Remove unused cmakedefine `KOKKOS_ENABLE_COMPILER_WARNINGS` [\#4883](https://github.com/kokkos/kokkos/pull/4883)
- Remove unused `KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK` [\#4882](https://github.com/kokkos/kokkos/pull/4882)
- Drop Instruction Set Architecture (ISA) macros [\#4981](https://github.com/kokkos/kokkos/pull/4981)
- Warn in `ScopeGuard` about illegal usage [\#5250](https://github.com/kokkos/kokkos/pull/5250)
### Deprecations:
- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178)
- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230)
- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141)
- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185)
- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135)
- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134)
- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120)
- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117)
- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111)
- Deprecate `Kokkos::common_view_alloc_prop` [\#5059](https://github.com/kokkos/kokkos/pull/5059)
- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957)
- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810)
- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382)
- Warn about `parallel_reduce` cases that call `join()` with volatile-qualified arguments [\#5215](https://github.com/kokkos/kokkos/pull/5215)
### Bug Fixes:
- CUDA Reductions: Fix data races reported by Nvidia `compute-sanitizer` [\#4855](https://github.com/kokkos/kokkos/pull/4855)
- Work around Intel compiler bug [\#5301](https://github.com/kokkos/kokkos/pull/5301)
- Avoid allocating memory for UniqueToken [\#5300](https://github.com/kokkos/kokkos/pull/5300)
- DynamicView: Properly resize mirror instances after construction [\#5276](https://github.com/kokkos/kokkos/pull/5276)
- Remove Kokkos::Rank limit of 6 ranks [\#5271](https://github.com/kokkos/kokkos/pull/5271)
- Do not forget to set last element to nullptr when removing a flag in `Kokkos::initialize` [\#5272](https://github.com/kokkos/kokkos/pull/5272)
- Fix CUDA+MSVC build issue [\#5261](https://github.com/kokkos/kokkos/pull/5261)
- Fix `DynamicView::resize_serial` [\#5220](https://github.com/kokkos/kokkos/pull/5220)
- Fix cmake default compiler flags for unknown compiler [\#5217](https://github.com/kokkos/kokkos/pull/5217)
- Fix `move_backward` [\#5191](https://github.com/kokkos/kokkos/pull/5191)
- Fixing issue 5196 - missing symbol with intel compiler [\#5207](https://github.com/kokkos/kokkos/pull/5207)
- Preserve `KOKKOS_INVALID_INDEX` in ViewDimension and ArrayLayout construction [\#5188](https://github.com/kokkos/kokkos/pull/5188)
- Finalize `deep_copy_space` early avoiding printing to `std::cerr` for Cuda [\#5151](https://github.com/kokkos/kokkos/pull/5151)
- Use correct policy in Threads MDRange `parallel_reduce` [\#5123](https://github.com/kokkos/kokkos/pull/5123)
- Fix building with NVCC as the CXX compiler while the CUDA backend is not enabled [\#5115](https://github.com/kokkos/kokkos/pull/5115)
- OpenMPTarget Index range fix for MDRange. [\#5089](https://github.com/kokkos/kokkos/pull/5089)
- Fix bug with CUDA's team reduction for empty ranges [\#5079](https://github.com/kokkos/kokkos/pull/5079)
- Fix using `ZeroMemset` for Serial [\#5077](https://github.com/kokkos/kokkos/pull/5077)
- Fix `Kokkos::Vector::push_back` for default execution space [\#5047](https://github.com/kokkos/kokkos/pull/5047)
- ScatterView: Fix ScatterMin/ScatterMax to use proper atomics [\#5045](https://github.com/kokkos/kokkos/pull/5045)
- Fix calling `ZeroMemset` in `deep_copy` [\#5040](https://github.com/kokkos/kokkos/pull/5040)
- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024)
- Guard against unrecognized pragma with intel compilers [\#5019](https://github.com/kokkos/kokkos/pull/5019)
- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008)
- KokkosP: Fix `device_id` in profiling [\#4997](https://github.com/kokkos/kokkos/pull/4997)
- Fix for `Kokkos::vector::insert` into empty vector with begin and end iterators [\#4988](https://github.com/kokkos/kokkos/pull/4988)
- Fix Core header files installation [\#4984](https://github.com/kokkos/kokkos/pull/4984)
- Fix bounds errors with `Kokkos::sort` [\#4980](https://github.com/kokkos/kokkos/pull/4980)
- Fixup let `RangePolicy::set_chunk_size` return a reference to self [\#4918](https://github.com/kokkos/kokkos/pull/4918)
- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907)
- Fix combined reductions with `Kokkos::View` [\#4896](https://github.com/kokkos/kokkos/pull/4896)
- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893)
- Fixup `View::access()` truncate parameter pack [\#4876](https://github.com/kokkos/kokkos/pull/4876)
- Fix `abort` with HIP backend for ROCm 5.0.2 and beyond [\#4873](https://github.com/kokkos/kokkos/pull/4873)
- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872)
- Fix scratch lock array when using scratch level 1 [\#4871](https://github.com/kokkos/kokkos/pull/4871)
- Fix Makefile.kokkos to work with fujitsu compiler [\#4867](https://github.com/kokkos/kokkos/pull/4867)
- cmake: Correct link THREADS link option [\#4854](https://github.com/kokkos/kokkos/pull/4854)
- UniqueToken `impl_acquire` function should be device only [\#4819](https://github.com/kokkos/kokkos/pull/4819)
- Fix example calls to non existing static `print_configuration` [\#4806](https://github.com/kokkos/kokkos/pull/4806)
- Fix requests for large team scratch sizes [\#4728](https://github.com/kokkos/kokkos/pull/4728)
## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01)

View File

@ -1,3 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
# Disable in-source builds to prevent source tree corruption.
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
@ -28,11 +29,6 @@ SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Needed to simplify syntax of if statements
CMAKE_POLICY(SET CMP0054 NEW)
# Needed to make IN_LIST a valid operator
CMAKE_POLICY(SET CMP0057 NEW)
# Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON)
@ -72,7 +68,6 @@ ENDFUNCTION()
LIST(APPEND CMAKE_MODULE_PATH cmake/Modules)
IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
@ -80,7 +75,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
# downstream dependencies need to match this!
SET(KOKKOS_COMPILE_LANGUAGE CXX)
# use lower case here since we didn't parse options yet
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA)
# Without this as a language for the package we would get a C++ compiler enabled.
# but we still need a C++ compiler even if we build all our cpp files as CUDA only
@ -90,9 +85,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
# days.
SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
IF (Kokkos_ENABLE_CUDA)
SET(KOKKOS_COMPILE_LANGUAGE CUDA)
ENDIF()
SET(KOKKOS_COMPILE_LANGUAGE CUDA)
ENDIF()
IF (Spack_WORKAROUND)
@ -135,14 +128,11 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 6)
set(Kokkos_VERSION_PATCH 01)
set(Kokkos_VERSION_MINOR 7)
set(Kokkos_VERSION_PATCH 00)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables")
CMAKE_POLICY(SET CMP0074 NEW)
# Load either the real TriBITS or a TriBITS wrapper
# for certain utility functions that are universal (like GLOBAL_SET)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake)
@ -204,11 +194,16 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT()
OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF)
SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms)
SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms)
SET(KOKKOS_SUB_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms)
IF (KOKKOS_CXX_STANDARD GREATER_EQUAL 17)
LIST(APPEND KOKKOS_EXT_LIBRARIES Kokkos::kokkossimd)
LIST(APPEND KOKKOS_SUB_LIBRARIES kokkossimd)
ENDIF()
SET(KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_SUB_LIBRARIES})
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
IF (KOKKOS_HAS_TRILINOS)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
SET(KOKKOS_IS_SUBDIRECTORY TRUE)
ELSEIF(HAS_PARENT)
@ -296,7 +291,7 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
#Make sure in-tree projects can reference this as Kokkos::
#to match the installed target names
ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos)
TARGET_LINK_LIBRARIES(kokkos INTERFACE kokkoscore kokkoscontainers kokkosalgorithms)
TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_SUB_LIBRARIES})
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos)
ENDIF()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)

View File

@ -11,21 +11,21 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 6
KOKKOS_VERSION_PATCH = 01
KOKKOS_VERSION_MINOR = 7
KOKKOS_VERSION_PATCH = 00
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Threads"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906,Vega908,Vega90A
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -193,6 +193,8 @@ KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VE
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
# TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment.
KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU)
# Check Host Compiler if using NVCC through nvcc_wrapper
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -221,8 +223,23 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
KOKKOS_INTENAL_COMPILER_CLANG = 0
endif
# Fujitsu passes also as clang and gcc respectively
ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
# TODO handle gcc flags and workaround for bug?
# fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730
$(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.)
# HACK since fujitsu only accepts some gcc flags, disable gcc here?
# KOKKOS_INTERNAL_COMPILER_GCC = 0
endif
# TODO handle clang flags
# warnings: works fine as is
# openmp: handled
#KOKKOS_INTERNAL_COMPILER_CLANG = 0
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
# TODO empty variable if fujitsu (clang mode) passes as clang
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.')
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -272,7 +289,12 @@ else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1)
# fujitsu (clang mode) fails with `=libomp`
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
endif
endif
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
@ -300,11 +322,15 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
#KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping
KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif
endif
endif
@ -344,8 +370,12 @@ KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL)
KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX)
KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL)
KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
@ -353,6 +383,7 @@ KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)
# NVIDIA based.
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
@ -436,19 +467,9 @@ KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_W
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
# Decide whether we can support transactional memory
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -551,36 +572,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
endif
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
#only add the c++ standard flags if this is not CMake
tmp := $(call kokkos_append_header,"/* General Settings */")
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
@ -1041,7 +1032,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSKYLAKE
KOKKOS_LDFLAGS += -xSKYLAKE
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Nothing here yet.
KOKKOS_CXXFLAGS += -march=skylake
KOKKOS_LDFLAGS += -march=skylake
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
@ -1055,13 +1067,31 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
else
# Nothing here yet.
KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512
KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client
KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server
KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids
KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
KOKKOS_CXXFLAGS += -mmic
@ -1091,7 +1121,7 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march
endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif
@ -1192,29 +1222,29 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
endif
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
@ -1230,51 +1260,67 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
endif
endif
# Figure out the architecture flag for SYCL.
# Figure out Intel architecture flags.
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp"
endif
KOKKOS_INTERNAL_LC_BACKEND := sycl
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_INTERNAL_LC_BACKEND := openmp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device xehp"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.4.0"
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp)
KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG)
KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
KOKKOS_LDFLAGS+=-fsycl
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) -D__STRICT_ANSI__
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS")
KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include
else
ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
$(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) )
@ -1359,7 +1405,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
ifneq ($(CUDA_PATH),)
@ -1417,6 +1463,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
KOKKOS_TPL_LIBRARY_NAMES += pthread
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp)
@ -1449,15 +1500,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
endif
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
# device to avoid a link warning.
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
endif
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
endif
# With Cygwin functions such as fdopen and fileno are not defined
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14
# though. So we hard undefine it here. Not sure if that has any bad side effects

View File

@ -16,10 +16,6 @@ Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ho
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
@ -41,6 +37,13 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,8 +53,8 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
@ -70,20 +73,18 @@ Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp
Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
endif

View File

@ -10,270 +10,48 @@ hierarchies and multiple types of execution resources. It currently can use
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
backends in development.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.**
For the complete documentation, click below:
# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki)
# Learning about Kokkos
The best way to start learning about Kokkos is going through the Kokkos Lectures.
They are online available at https://kokkos.link/the-lectures and contain a mix
of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem
capabilities.
To start learning about Kokkos:
A programming guide and API reference can be found on the Wiki
(https://github.com/kokkos/kokkos/wiki).
- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities.
- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html).
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
For non-public questions send an email to
crtrott(at)sandia.gov
For non-public questions send an email to: *crtrott(at)sandia.gov*
# Contributing to Kokkos
We are open and try to encourage contributions from external developers.
To do so please first open an issue describing the contribution and then issue
a pull request against the develop branch. For larger features it may be good
to get guidance from the core development team first through the github issue.
Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Which means contributing to Kokkos allows anyone else to use your contributions
not just for public purposes but also for closed source commercial projects.
For specifics see the LICENSE file contained in the repository or distribution.
# Requirements, Building and Installing
# Requirements
### Minimum Compiler Versions
Generally Kokkos should work with all compiler versions newer than the minimum.
However as in all sufficiently complex enough code, we have to work around compiler
bugs with almost all compilers. So compiler versions we don't test may have issues
we are unaware of.
* GCC: 5.3.0
* Clang: 4.0.0
* Intel: 17.0.1
* NVCC: 9.2.88
* NVC++: 21.5
* ROCm: 4.3
* MSVC: 19.29
* IBM XL: 16.1.1
* Fujitsu: 4.5.0
* ARM/Clang 20.1
### Primary Tested Compilers
* GCC: 5.3.0, 6.1.0, 7.3.0, 8.3, 9.2, 10.0
* NVCC: 9.2.88, 10.1, 11.0
* Clang: 8.0.0, 9.0.0, 10.0.0, 12.0.0
* Intel 17.4, 18.1, 19.5
* MSVC: 19.29
* ARM/Clang: 20.1
* IBM XL: 16.1.1
* ROCm: 4.3.0
### Build system:
* CMake >= 3.16: required
* CMake >= 3.18: Fortran linkage. This does not affect most mixed Fortran/Kokkos builds. See [build issues](BUILD.md#KnownIssues).
* CMake >= 3.21.1 for NVC++
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...).
We are using the following set of flags:
* GCC:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body
-Wclobbered -Wuninitialized
````
* Intel:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* Clang:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* NVCC:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
Other compilers are tested occasionally, in particular when pushing from develop to
master branch. These are tested less rigorously without `-Werror` and only for a select set of backends.
# Building and Installing Kokkos
Kokkos provide a CMake build system and a raw Makefile build system.
The CMake build system is strongly encouraged and will be the most rigorously supported in future releases.
Full details are given in the [build instructions](BUILD.md). Basic setups are shown here:
## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
````bash
cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \
-DCMAKE_INSTALL_PREFIX=$path_to_install \
-DKokkos_ENABLE_OPENMP=On \
-DKokkos_ARCH_HSW=On \
-DKokkos_HWLOC_DIR=$path_to_hwloc
````
then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages.
To validate the Kokkos build, configure with
````
-DKokkos_ENABLE_TESTS=On
````
and run `make test` after completing the build.
For your CMake project using Kokkos, code such as the following:
````cmake
find_package(Kokkos)
...
target_link_libraries(myTarget Kokkos::kokkos)
````
should be added to your CMakeLists.txt. Your configure should additionally include
````
-DKokkos_DIR=$path_to_install/cmake/lib/Kokkos
````
or
````
-DKokkos_ROOT=$path_to_install
````
for the install location given above.
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To get started, download the Spack [repo](https://github.com/spack/spack).
````
A basic installation would be done as:
````bash
> spack install kokkos
````
Spack allows options and and compilers to be tuned in the install command.
````bash
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
````
This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run:
````bash
> spack info kokkos
````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
````bash
> spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
## Raw Makefile
Raw Makefiles are only supported via inline builds. See below.
## Inline Builds vs. Installed Package
For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package.
The main reason is that you may otherwise need many different
configurations of Kokkos installed depending on the required compile time
features an application needs. For example there is only one default
execution space, which means you need different installations to have OpenMP
or C++ threads as the default space. Also for the CUDA backend there are certain
choices, such as allowing relocatable device code, which must be made at
installation time. Building Kokkos inline uses largely the same process
as compiling an application against an installed Kokkos library.
For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
# Kokkos and CUDA UVM
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
* In multi socket multi GPU machines without NVLINK, UVM defaults
to using zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html).
Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html).
# Citing Kokkos
If you publish work which mentions Kokkos, please cite the following paper:
Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html).
````BibTex
@ARTICLE{9485033,
author={Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={Kokkos 3: Programming Model Extensions for the Exascale Era},
year={2022},
volume={33},
number={4},
pages={805-817},
doi={10.1109/TPDS.2021.3097283}}
````
If you use more than one Kokkos EcoSystem package, please also cite:
````BibTex
@ARTICLE{9502936,
author={Trott, Christian and Berger-Vergiat, Luc and Poliakoff, David and Rajamanickam, Sivasankaran and Lebrun-Grandie, Damien and Madsen, Jonathan and Al Awar, Nader and Gligoric, Milos and Shipman, Galen and Womeldorff, Geoff},
journal={Computing in Science Engineering},
title={The Kokkos EcoSystem: Comprehensive Performance Portability for High Performance Computing},
year={2021},
volume={23},
number={5},
pages={10-18},
doi={10.1109/MCSE.2021.3098509}}
````
And if you feel generous: feel free to cite the original Kokkos paper which describes most of the basic Kokkos concepts:
````BibTeX
@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ",
volume = "74",
number = "12",
pages = "3202 - 3216",
year = "2014",
note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ",
issn = "0743-7315",
doi = "https://doi.org/10.1016/j.jpdc.2014.07.003",
url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257",
author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland"
}
````
##### [LICENSE](https://github.com/kokkos/kokkos/blob/master/LICENSE)
# License
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software.
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).

View File

@ -11,6 +11,7 @@ FILE(GLOB ALGO_HEADERS *.hpp)
FILE(GLOB ALGO_SOURCES *.cpp)
LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp)
INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"

View File

@ -44,6 +44,10 @@
#ifndef KOKKOS_RANDOM_HPP
#define KOKKOS_RANDOM_HPP
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#endif
#include <Kokkos_Core.hpp>
#include <Kokkos_Complex.hpp>
@ -648,63 +652,44 @@ struct Random_UniqueIndex {
}
};
#ifdef KOKKOS_ENABLE_CUDA
template <class MemorySpace>
struct Random_UniqueIndex<Kokkos::Device<Kokkos::Cuda, MemorySpace>> {
using locks_view_type =
View<int**, Kokkos::Device<Kokkos::Cuda, MemorySpace>>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
#if defined(KOKKOS_ENABLE_CUDA)
#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Cuda
#elif defined(KOKKOS_ENABLE_HIP)
#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Experimental::HIP
#endif
#ifdef KOKKOS_ENABLE_HIP
template <class MemorySpace>
struct Random_UniqueIndex<
Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>> {
Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP, MemorySpace>> {
using locks_view_type =
View<int**, Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>>;
View<int**, Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP,
MemorySpace>>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
KOKKOS_IF_ON_DEVICE((
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i =
(((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;))
KOKKOS_IF_ON_HOST(((void)locks_; return 0;))
}
};
#undef KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP
#endif
#ifdef KOKKOS_ENABLE_SYCL
@ -1279,7 +1264,6 @@ struct fill_random_functor_begin_end;
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1303,7 +1287,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1331,7 +1314,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1361,7 +1343,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1392,7 +1373,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1424,7 +1404,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1458,7 +1437,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1494,7 +1472,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1532,7 +1509,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a;
RandomPool rand_pool;
typename ViewType::const_value_type begin, end;
@ -1569,34 +1545,57 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
}
};
template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
template <class ExecutionSpace, class ViewType, class RandomPool,
class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
int64_t LDA = a.extent(0);
if (LDA > 0)
parallel_for("Kokkos::fill_random", (LDA + 127) / 128,
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, begin, end));
parallel_for(
"Kokkos::fill_random",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, (LDA + 127) / 128),
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, begin, end));
}
} // namespace Impl
template <class ExecutionSpace, class ViewType, class RandomPool,
class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
Impl::apply_to_view_of_static_rank(
[&](auto dst) { Kokkos::Impl::fill_random(exec, dst, g, begin, end); },
a);
}
template <class ExecutionSpace, class ViewType, class RandomPool,
class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
fill_random(exec, a, g, 0, range);
}
template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
Impl::apply_to_view_of_static_rank(
[&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a);
fill_random(typename ViewType::execution_space{}, a, g, begin, end);
}
template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
fill_random(a, g, 0, range);
fill_random(typename ViewType::execution_space{}, a, g, 0, range);
}
} // namespace Kokkos
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#endif
#endif

View File

@ -44,6 +44,10 @@
#ifndef KOKKOS_SORT_HPP_
#define KOKKOS_SORT_HPP_
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#endif
#include <Kokkos_Core.hpp>
@ -120,13 +124,13 @@ class BinSort {
// If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type.
using src_view_type = typename std::conditional<
using src_view_type = std::conditional_t<
Kokkos::is_view<SrcViewType>::value,
Kokkos::View<typename SrcViewType::const_data_type,
typename SrcViewType::array_layout,
typename SrcViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
typename SrcViewType::const_type>::type;
typename SrcViewType::const_type>;
using perm_view_type = typename PermuteViewType::const_type;
@ -151,8 +155,11 @@ class BinSort {
}
};
using execution_space = typename Space::execution_space;
using bin_op_type = BinSortOp;
// Naming this alias "execution_space" would be problematic since it would be
// considered as execution space for the various functors which might use
// another execution space through sort() or create_permute_vector().
using exec_space = typename Space::execution_space;
using bin_op_type = BinSortOp;
struct bin_count_tag {};
struct bin_offset_tag {};
@ -171,13 +178,13 @@ class BinSort {
// If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type.
using const_rnd_key_view_type = typename std::conditional<
using const_rnd_key_view_type = std::conditional_t<
Kokkos::is_view<KeyViewType>::value,
Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout,
typename KeyViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
const_key_view_type>::type;
const_key_view_type>;
using non_const_key_scalar = typename KeyViewType::non_const_value_type;
using const_key_scalar = typename KeyViewType::const_value_type;
@ -220,6 +227,14 @@ class BinSort {
range_begin(range_begin_),
range_end(range_end_),
sort_within_bins(sort_within_bins_) {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
if (bin_op.max_bins() <= 0)
Kokkos::abort(
"The number of bins in the BinSortOp object must be greater than 0!");
bin_count_atomic = Kokkos::View<int*, Space>(
"Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
bin_count_const = bin_count_atomic;
@ -235,7 +250,7 @@ class BinSort {
BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_,
: BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_,
sort_within_bins_) {}
template <typename ExecutionSpace>
@ -245,13 +260,19 @@ class BinSort {
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {}
: BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {}
//----------------------------------------
// Create the permutation vector, the bin_offset array and the bin_count
// array. Can be called again if keys changed
template <class ExecutionSpace = execution_space>
void create_permute_vector(const ExecutionSpace& exec = execution_space{}) {
template <class ExecutionSpace = exec_space>
void create_permute_vector(const ExecutionSpace& exec = exec_space{}) {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
const size_t len = range_end - range_begin;
Kokkos::parallel_for(
"Kokkos::Sort::BinCount",
@ -281,6 +302,17 @@ class BinSort {
template <class ExecutionSpace, class ValuesViewType>
void sort(const ExecutionSpace& exec, ValuesViewType const& values,
int values_range_begin, int values_range_end) const {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
static_assert(
Kokkos::SpaceAccessibility<
ExecutionSpace, typename ValuesViewType::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"of the View argument!");
using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout,
@ -340,7 +372,7 @@ class BinSort {
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
execution_space exec;
exec_space exec;
sort(exec, values, values_range_begin, values_range_end);
exec.fence("Kokkos::Sort: fence after sorting");
}
@ -428,7 +460,7 @@ struct BinOp1D {
BinOp1D() = default;
// Construct BinOp with number of bins, minimum value and maxuimum value
// Construct BinOp with number of bins, minimum value and maximum value
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max)
: max_bins_(max_bins__ + 1),
@ -554,11 +586,7 @@ struct min_max_functor {
template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view,
bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view, exec)) return;
}
const ExecutionSpace& exec, ViewType const& view) {
using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
@ -596,12 +624,38 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
bin_sort.sort(exec, view);
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
template <class ExecutionSpace, class ViewType>
KOKKOS_DEPRECATED_WITH_COMMENT(
"Use the overload not taking bool always_use_kokkos_sort")
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view,
bool const always_use_kokkos_sort) {
if (!always_use_kokkos_sort && Impl::try_std_sort(view, exec)) {
return;
} else {
sort(exec, view);
}
}
#endif
template <class ViewType>
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
void sort(ViewType const& view) {
typename ViewType::execution_space exec;
sort(exec, view);
exec.fence("Kokkos::Sort: fence after sorting");
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
template <class ViewType>
KOKKOS_DEPRECATED_WITH_COMMENT(
"Use the overload not taking bool always_use_kokkos_sort")
void sort(ViewType const& view, bool const always_use_kokkos_sort) {
typename ViewType::execution_space exec;
sort(exec, view, always_use_kokkos_sort);
exec.fence("Kokkos::Sort: fence after sorting");
}
#endif
template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
@ -635,4 +689,8 @@ void sort(ViewType view, size_t const begin, size_t const end) {
} // namespace Kokkos
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#endif
#endif

View File

@ -44,59 +44,103 @@
#ifndef KOKKOS_STD_ALGORITHMS_HPP
#define KOKKOS_STD_ALGORITHMS_HPP
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#endif
/// \file Kokkos_StdAlgorithms.hpp
/// \brief Kokkos counterparts for Standard C++ Library algorithms
#include <std_algorithms/Kokkos_Constraints.hpp>
#include <std_algorithms/Kokkos_RandomAccessIterator.hpp>
#include <std_algorithms/Kokkos_BeginEnd.hpp>
#include "std_algorithms/impl/Kokkos_Constraints.hpp"
#include "std_algorithms/impl/Kokkos_RandomAccessIterator.hpp"
#include "std_algorithms/Kokkos_BeginEnd.hpp"
// distance
#include <std_algorithms/Kokkos_Distance.hpp>
#include "std_algorithms/Kokkos_Distance.hpp"
// move, swap, iter_swap
#include "std_algorithms/Kokkos_ModifyingOperations.hpp"
// note that we categorize below the headers
// following the std classification.
// find, find_if, find_if_not
// for_each, for_each_n
// mismatch
// equal
// count_if, count
// all_of, any_of, none_of
// adjacent_find
// lexicographical_compare
// search, search_n
// find_first_of, find_end
#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
// modifying ops
#include "std_algorithms/Kokkos_Swap.hpp"
#include "std_algorithms/Kokkos_IterSwap.hpp"
// replace, replace_copy_if, replace_copy, replace_if
// copy, copy_n, copy_backward, copy_if
// fill, fill_n
// transform
// generate, generate_n
// reverse, reverse_copy
// move, move_backward
// swap_ranges
// unique, unique_copy
// rotate, rotate_copy
// remove, remove_if, remove_copy, remove_copy_if
// shift_left, shift_right
#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp>
// non-modifying sequence
#include "std_algorithms/Kokkos_AdjacentFind.hpp"
#include "std_algorithms/Kokkos_Count.hpp"
#include "std_algorithms/Kokkos_CountIf.hpp"
#include "std_algorithms/Kokkos_AllOf.hpp"
#include "std_algorithms/Kokkos_AnyOf.hpp"
#include "std_algorithms/Kokkos_NoneOf.hpp"
#include "std_algorithms/Kokkos_Equal.hpp"
#include "std_algorithms/Kokkos_Find.hpp"
#include "std_algorithms/Kokkos_FindIf.hpp"
#include "std_algorithms/Kokkos_FindIfNot.hpp"
#include "std_algorithms/Kokkos_FindEnd.hpp"
#include "std_algorithms/Kokkos_FindFirstOf.hpp"
#include "std_algorithms/Kokkos_ForEach.hpp"
#include "std_algorithms/Kokkos_ForEachN.hpp"
#include "std_algorithms/Kokkos_LexicographicalCompare.hpp"
#include "std_algorithms/Kokkos_Mismatch.hpp"
#include "std_algorithms/Kokkos_Search.hpp"
#include "std_algorithms/Kokkos_SearchN.hpp"
// is_sorted_until, is_sorted
#include <std_algorithms/Kokkos_SortingOperations.hpp>
// modifying sequence
#include "std_algorithms/Kokkos_Fill.hpp"
#include "std_algorithms/Kokkos_FillN.hpp"
#include "std_algorithms/Kokkos_Replace.hpp"
#include "std_algorithms/Kokkos_ReplaceIf.hpp"
#include "std_algorithms/Kokkos_ReplaceCopyIf.hpp"
#include "std_algorithms/Kokkos_ReplaceCopy.hpp"
#include "std_algorithms/Kokkos_Copy.hpp"
#include "std_algorithms/Kokkos_CopyN.hpp"
#include "std_algorithms/Kokkos_CopyBackward.hpp"
#include "std_algorithms/Kokkos_CopyIf.hpp"
#include "std_algorithms/Kokkos_Transform.hpp"
#include "std_algorithms/Kokkos_Generate.hpp"
#include "std_algorithms/Kokkos_GenerateN.hpp"
#include "std_algorithms/Kokkos_Reverse.hpp"
#include "std_algorithms/Kokkos_ReverseCopy.hpp"
#include "std_algorithms/Kokkos_Move.hpp"
#include "std_algorithms/Kokkos_MoveBackward.hpp"
#include "std_algorithms/Kokkos_SwapRanges.hpp"
#include "std_algorithms/Kokkos_Unique.hpp"
#include "std_algorithms/Kokkos_UniqueCopy.hpp"
#include "std_algorithms/Kokkos_Rotate.hpp"
#include "std_algorithms/Kokkos_RotateCopy.hpp"
#include "std_algorithms/Kokkos_Remove.hpp"
#include "std_algorithms/Kokkos_RemoveIf.hpp"
#include "std_algorithms/Kokkos_RemoveCopy.hpp"
#include "std_algorithms/Kokkos_RemoveCopyIf.hpp"
#include "std_algorithms/Kokkos_ShiftLeft.hpp"
#include "std_algorithms/Kokkos_ShiftRight.hpp"
// min_element, max_element, minmax_element
#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp>
// sorting
#include "std_algorithms/Kokkos_IsSortedUntil.hpp"
#include "std_algorithms/Kokkos_IsSorted.hpp"
// is_partitioned, partition_copy, partition_point
#include <std_algorithms/Kokkos_PartitioningOperations.hpp>
// min/max element
#include "std_algorithms/Kokkos_MinElement.hpp"
#include "std_algorithms/Kokkos_MaxElement.hpp"
#include "std_algorithms/Kokkos_MinMaxElement.hpp"
// adjacent_difference
// reduce, transform_reduce
// exclusive_scan, transform_exclusive_scan
// inclusive_scan, transform_inclusive_scan
#include <std_algorithms/Kokkos_Numeric.hpp>
// partitioning
#include "std_algorithms/Kokkos_IsPartitioned.hpp"
#include "std_algorithms/Kokkos_PartitionCopy.hpp"
#include "std_algorithms/Kokkos_PartitionPoint.hpp"
// numeric
#include "std_algorithms/Kokkos_AdjacentDifference.hpp"
#include "std_algorithms/Kokkos_Reduce.hpp"
#include "std_algorithms/Kokkos_TransformReduce.hpp"
#include "std_algorithms/Kokkos_ExclusiveScan.hpp"
#include "std_algorithms/Kokkos_TransformExclusiveScan.hpp"
#include "std_algorithms/Kokkos_InclusiveScan.hpp"
#include "std_algorithms/Kokkos_TransformInclusiveScan.hpp"
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#endif
#endif

View File

@ -42,106 +42,15 @@
//@HEADER
*/
#ifndef KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP
#define KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP
#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP
#define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP
#include <Kokkos_Core.hpp>
#include "../Kokkos_BeginEnd.hpp"
#include "../Kokkos_Constraints.hpp"
#include "../Kokkos_Distance.hpp"
#include "impl/Kokkos_AdjacentDifference.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
namespace Impl {
// ------------------------
//
// functors
//
// ------------------------
template <class ValueType1, class ValueType2, class RetType = ValueType2>
struct StdAdjacentDifferenceDefaultBinaryOpFunctor {
KOKKOS_FUNCTION
constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const {
return a - b;
}
};
template <class InputIteratorType, class OutputIteratorType,
class BinaryOperator>
struct StdAdjacentDiffFunctor {
using index_type = typename InputIteratorType::difference_type;
const InputIteratorType m_first_from;
const OutputIteratorType m_first_dest;
BinaryOperator m_op;
KOKKOS_FUNCTION
void operator()(const index_type i) const {
const auto& my_value = m_first_from[i];
if (i == 0) {
m_first_dest[i] = my_value;
} else {
const auto& left_value = m_first_from[i - 1];
m_first_dest[i] = m_op(my_value, left_value);
}
}
KOKKOS_FUNCTION
StdAdjacentDiffFunctor(InputIteratorType first_from,
OutputIteratorType first_dest, BinaryOperator op)
: m_first_from(std::move(first_from)),
m_first_dest(std::move(first_dest)),
m_op(std::move(op)) {}
};
// ------------------------------------------
// adjacent_difference_impl
// ------------------------------------------
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class BinaryOp>
OutputIteratorType adjacent_difference_impl(const std::string& label,
const ExecutionSpace& ex,
InputIteratorType first_from,
InputIteratorType last_from,
OutputIteratorType first_dest,
BinaryOp bin_op) {
// checks
Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
Impl::static_assert_iterators_have_matching_difference_type(first_from,
first_dest);
Impl::expect_valid_range(first_from, last_from);
if (first_from == last_from) {
return first_dest;
}
// aliases
using value_type = typename OutputIteratorType::value_type;
using aux_view_type = ::Kokkos::View<value_type*, ExecutionSpace>;
using functor_t =
StdAdjacentDiffFunctor<InputIteratorType, OutputIteratorType, BinaryOp>;
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
aux_view_type aux_view("aux_view", num_elements);
::Kokkos::parallel_for(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
functor_t(first_from, first_dest, bin_op));
ex.fence("Kokkos::adjacent_difference: fence after operation");
// return
return first_dest + num_elements;
}
} // end namespace Impl
// ------------------------
//
// public API
//
// ------------------------
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType>
std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,

View File

@ -0,0 +1,124 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP
#define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP
#include "impl/Kokkos_AdjacentFind.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
// overload set1
template <class ExecutionSpace, class IteratorType>
IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
IteratorType last) {
return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
ex, first, last);
}
template <class ExecutionSpace, class IteratorType>
IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last) {
return Impl::adjacent_find_impl(label, ex, first, last);
}
template <class ExecutionSpace, class DataType, class... Properties>
auto adjacent_find(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
KE::begin(v), KE::end(v));
}
template <class ExecutionSpace, class DataType, class... Properties>
auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v));
}
// overload set2
template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
IteratorType last, BinaryPredicateType pred) {
return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
ex, first, last, pred);
}
template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last,
BinaryPredicateType pred) {
return Impl::adjacent_find_impl(label, ex, first, last, pred);
}
template <class ExecutionSpace, class DataType, class... Properties,
class BinaryPredicateType>
auto adjacent_find(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
BinaryPredicateType pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
KE::begin(v), KE::end(v), pred);
}
template <class ExecutionSpace, class DataType, class... Properties,
class BinaryPredicateType>
auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
BinaryPredicateType pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred);
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP
#define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP
#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Predicate>
bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
Predicate predicate) {
return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first,
last, predicate);
}
template <class ExecutionSpace, class InputIterator, class Predicate>
bool all_of(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last, Predicate predicate) {
return Impl::all_of_impl(label, ex, first, last, predicate);
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool all_of(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v),
KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool all_of(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP
#define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP
#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Predicate>
bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
Predicate predicate) {
return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last,
predicate);
}
template <class ExecutionSpace, class InputIterator, class Predicate>
bool any_of(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last, Predicate predicate) {
return Impl::any_of_impl(label, ex, first, last, predicate);
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool any_of(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v),
KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool any_of(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -46,8 +46,8 @@
#define KOKKOS_BEGIN_END_HPP
#include <Kokkos_View.hpp>
#include "Kokkos_RandomAccessIterator.hpp"
#include "Kokkos_Constraints.hpp"
#include "impl/Kokkos_RandomAccessIterator.hpp"
#include "impl/Kokkos_Constraints.hpp"
/// \file Kokkos_BeginEnd.hpp
/// \brief Kokkos begin, end, cbegin, cend

View File

@ -0,0 +1,97 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_HPP
#include "impl/Kokkos_CopyCopyN.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class OutputIterator>
OutputIterator copy(const ExecutionSpace& ex, InputIterator first,
InputIterator last, OutputIterator d_first) {
return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last,
d_first);
}
template <class ExecutionSpace, class InputIterator, class OutputIterator>
OutputIterator copy(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last,
OutputIterator d_first) {
return Impl::copy_impl(label, ex, first, last, d_first);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_impl("Kokkos::copy_view_api_default", ex,
KE::cbegin(source), KE::cend(source), KE::begin(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source),
KE::begin(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,95 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP
#include "impl/Kokkos_CopyBackward.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first,
IteratorType1 last, IteratorType2 d_last) {
return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default",
ex, first, last, d_last);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex,
IteratorType1 first, IteratorType1 last,
IteratorType2 d_last) {
return Impl::copy_backward_impl(label, ex, first, last, d_last);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex,
cbegin(source), cend(source), end(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source),
end(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP
#include "impl/Kokkos_CopyIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class OutputIterator,
class Predicate>
OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first,
InputIterator last, OutputIterator d_first,
Predicate pred) {
return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first,
last, d_first, std::move(pred));
}
template <class ExecutionSpace, class InputIterator, class OutputIterator,
class Predicate>
OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last,
OutputIterator d_first, Predicate pred) {
return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class Predicate>
auto copy_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex,
cbegin(source), cend(source), begin(dest),
std::move(pred));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class Predicate>
auto copy_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_if_impl(label, ex, cbegin(source), cend(source),
begin(dest), std::move(pred));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,98 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_N_HPP
#include "impl/Kokkos_CopyCopyN.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Size,
class OutputIterator>
OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count,
OutputIterator result) {
return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first,
count, result);
}
template <class ExecutionSpace, class InputIterator, class Size,
class OutputIterator>
OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex,
InputIterator first, Size count, OutputIterator result) {
return Impl::copy_n_impl(label, ex, first, count, result);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class Size, class DataType2, class... Properties2>
auto copy_n(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex,
KE::cbegin(source), count, KE::begin(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class Size, class DataType2, class... Properties2>
auto copy_n(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_n_impl(label, ex, KE::cbegin(source), count,
KE::begin(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP
#define KOKKOS_STD_ALGORITHMS_COUNT_HPP
#include "impl/Kokkos_CountCountIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType, class T>
typename IteratorType::difference_type count(const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
const T& value) {
return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last,
value);
}
template <class ExecutionSpace, class IteratorType, class T>
typename IteratorType::difference_type count(const std::string& label,
const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
const T& value) {
return Impl::count_impl(label, ex, first, last, value);
}
template <class ExecutionSpace, class DataType, class... Properties, class T>
auto count(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v),
KE::cend(v), value);
}
template <class ExecutionSpace, class DataType, class... Properties, class T>
auto count(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value);
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP
#define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP
#include "impl/Kokkos_CountCountIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType, class Predicate>
typename IteratorType::difference_type count_if(const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
Predicate predicate) {
return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first,
last, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType, class Predicate>
typename IteratorType::difference_type count_if(const std::string& label,
const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
Predicate predicate) {
return Impl::count_if_impl(label, ex, first, last, std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
auto count_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex,
KE::cbegin(v), KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
auto count_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -45,8 +45,8 @@
#ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
#define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
#include "Kokkos_Constraints.hpp"
#include "Kokkos_RandomAccessIterator.hpp"
#include "impl/Kokkos_Constraints.hpp"
#include "impl/Kokkos_RandomAccessIterator.hpp"
namespace Kokkos {
namespace Experimental {

View File

@ -0,0 +1,198 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP
#define KOKKOS_STD_ALGORITHMS_EQUAL_HPP
#include "impl/Kokkos_Equal.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2) {
return Impl::equal_impl(label, ex, first1, last1, first2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, BinaryPredicateType predicate) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2,
BinaryPredicateType predicate) {
return Impl::equal_impl(label, ex, first1, last1, first2,
std::move(predicate));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class BinaryPredicateType>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2),
std::move(predicate));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class BinaryPredicateType>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2), std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, IteratorType2 last2) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, last2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) {
return Impl::equal_impl(label, ex, first1, last1, first2, last2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, IteratorType2 last2,
BinaryPredicateType predicate) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, last2, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2, IteratorType2 last2,
BinaryPredicateType predicate) {
return Impl::equal_impl(label, ex, first1, last1, first2, last2,
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,190 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP
#define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP
#include "impl/Kokkos_ExclusiveScan.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
// overload set 1
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
InputIteratorType last, OutputIteratorType first_dest,
ValueType init_value) {
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_default_op_impl(
"Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last,
first_dest, init_value);
}
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const std::string& label, const ExecutionSpace& ex,
InputIteratorType first, InputIteratorType last,
OutputIteratorType first_dest, ValueType init_value) {
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_default_op_impl(label, ex, first, last,
first_dest, init_value);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType>
auto exclusive_scan(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_default_op_impl(
"Kokkos::exclusive_scan_default_functors_view_api", ex,
KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
init_value);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType>
auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from),
KE::cend(view_from),
KE::begin(view_dest), init_value);
}
// overload set 2
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType, class BinaryOpType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
InputIteratorType last, OutputIteratorType first_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_custom_op_impl(
"Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last,
first_dest, init_value, bop);
}
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType, class BinaryOpType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const std::string& label, const ExecutionSpace& ex,
InputIteratorType first, InputIteratorType last,
OutputIteratorType first_dest, ValueType init_value,
BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest,
init_value, bop);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType,
class BinaryOpType>
auto exclusive_scan(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_custom_op_impl(
"Kokkos::exclusive_scan_custom_functors_view_api", ex,
KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType,
class BinaryOpType>
auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_custom_op_impl(
label, ex, KE::cbegin(view_from), KE::cend(view_from),
KE::begin(view_dest), init_value, bop);
}
} // namespace Experimental
} // namespace Kokkos
#endif

Some files were not shown because too many files have changed in this diff Show More