Merge branch 'lammps:develop' into thermo_mod_range

This commit is contained in:
hammondkd 2022-10-13 09:01:06 -05:00 committed by GitHub
commit 41c0b39365
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
767 changed files with 43285 additions and 29129 deletions

View File

@ -105,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512") set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
else() else()
set(CMAKE_TUNE_DEFAULT "-xHost") set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196")
endif() endif()
endif() endif()
endif() endif()
@ -854,8 +854,11 @@ if(BUILD_SHARED_LIBS OR PKG_PYTHON)
find_package(Python COMPONENTS Interpreter) find_package(Python COMPONENTS Interpreter)
endif() endif()
if(Python_EXECUTABLE) if(Python_EXECUTABLE)
file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/lib)
install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} setup.py build -b ${CMAKE_BINARY_DIR}/python install --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR}/ WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR})") file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/src)
file(COPY ${LAMMPS_SOURCE_DIR}/version.h DESTINATION ${CMAKE_BINARY_DIR}/python/src)
file(COPY ${LAMMPS_PYTHON_DIR}/README ${LAMMPS_PYTHON_DIR}/pyproject.toml ${LAMMPS_PYTHON_DIR}/setup.py ${LAMMPS_PYTHON_DIR}/lammps DESTINATION ${CMAKE_BINARY_DIR}/python/lib)
install(CODE "if(\"\$ENV{DESTDIR}\" STREQUAL \"\")\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX})\n else()\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR})\n endif()")
endif() endif()
endif() endif()

View File

@ -47,8 +47,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.6.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "0ec97fc0c356dd65bd2487defe81a7bf" CACHE STRING "MD5 checksum of KOKKOS tarball") set(KOKKOS_MD5 "84991eca9f066383abe119a5bc7a11c4" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5) mark_as_advanced(KOKKOS_MD5)
ExternalProject_Add(kokkos_build ExternalProject_Add(kokkos_build
@ -72,7 +72,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.6.01 REQUIRED CONFIG) find_package(Kokkos 3.7.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
target_link_libraries(lmp PRIVATE Kokkos::kokkos) target_link_libraries(lmp PRIVATE Kokkos::kokkos)
else() else()

View File

@ -1,6 +1,6 @@
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.tar.gz" CACHE STRING "URL for PACE evaluator library sources") set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.fix10Oct.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_MD5 "ad6c8597076479bd55059f5947d51acc" CACHE STRING "MD5 checksum of PACE evaluator library tarball") set(PACELIB_MD5 "766cebcc0e5c4b8430c2f3cd202d9905" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_URL)
mark_as_advanced(PACELIB_MD5) mark_as_advanced(PACELIB_MD5)

View File

@ -1,4 +1,4 @@
# preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes) # preset that will enable the classic Intel compilers with support for MPI and OpenMP (on Linux boxes)
set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE) set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE)
set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE) set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE)
@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE) unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C "icc" CACHE STRING "" FORCE) set(OpenMP_C "icc" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_CXX "icpc" CACHE STRING "" FORCE) set(OpenMP_CXX "icpc" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE) set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)

View File

@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE) unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C "icx" CACHE STRING "" FORCE) set(OpenMP_C "icx" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_CXX "icpx" CACHE STRING "" FORCE) set(OpenMP_CXX "icpx" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE) set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE) set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)

View File

@ -483,6 +483,9 @@ They must be specified in uppercase.
* - **Arch-ID** * - **Arch-ID**
- **HOST or GPU** - **HOST or GPU**
- **Description** - **Description**
* - NATIVE
- HOST
- Local machine
* - AMDAVX * - AMDAVX
- HOST - HOST
- AMD 64-bit x86 CPU (AVX 1) - AMD 64-bit x86 CPU (AVX 1)
@ -522,9 +525,21 @@ They must be specified in uppercase.
* - BDW * - BDW
- HOST - HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem) - Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem)
* - SKL
- HOST
- Intel Skylake Client CPU
* - SKX * - SKX
- HOST - HOST
- Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem) - Intel Skylake Xeon Server CPU (AVX512)
* - ICL
- HOST
- Intel Ice Lake Client CPU (AVX512)
* - ICX
- HOST
- Intel Ice Lake Xeon Server CPU (AVX512)
* - SPR
- HOST
- Intel Sapphire Rapids Xeon Server CPU (AVX512)
* - KNC * - KNC
- HOST - HOST
- Intel Knights Corner Xeon Phi - Intel Knights Corner Xeon Phi
@ -596,7 +611,10 @@ They must be specified in uppercase.
- AMD GPU MI100 GFX908 - AMD GPU MI100 GFX908
* - VEGA90A * - VEGA90A
- GPU - GPU
- AMD GPU - AMD GPU MI200 GFX90A
* - INTEL_GEN
- GPU
- SPIR64-based devices, e.g. Intel GPUs, using JIT
* - INTEL_DG1 * - INTEL_DG1
- GPU - GPU
- Intel Iris XeMAX GPU - Intel Iris XeMAX GPU
@ -611,9 +629,12 @@ They must be specified in uppercase.
- Intel GPU Gen12LP - Intel GPU Gen12LP
* - INTEL_XEHP * - INTEL_XEHP
- GPU - GPU
- Intel GPUs Xe-HP - Intel GPU Xe-HP
* - INTEL_PVC
- GPU
- Intel GPU Ponte Vecchio
This list was last updated for version 3.5.0 of the Kokkos library. This list was last updated for version 3.7.0 of the Kokkos library.
.. tabs:: .. tabs::

View File

@ -91,7 +91,7 @@ quantities.
+--------------+-----------------------------------------------------+--------------------------------------+ +--------------+-----------------------------------------------------+--------------------------------------+
| *charge* | charge | atomic system with charges | | *charge* | charge | atomic system with charges |
+--------------+-----------------------------------------------------+--------------------------------------+ +--------------+-----------------------------------------------------+--------------------------------------+
| *dielectric* | dipole, area, curvature | system with surface polarization | | *dielectric* | normx normy normz area/patch ed em epsilon curv | system with surface polarization |
+--------------+-----------------------------------------------------+--------------------------------------+ +--------------+-----------------------------------------------------+--------------------------------------+
| *dipole* | charge and dipole moment | system with dipolar particles | | *dipole* | charge and dipole moment | system with dipolar particles |
+--------------+-----------------------------------------------------+--------------------------------------+ +--------------+-----------------------------------------------------+--------------------------------------+
@ -180,16 +180,21 @@ vector with the 3 diameters of the ellipsoid and a quaternion 4-vector
with its orientation. with its orientation.
For the *dielectric* style, each particle can be either a physical For the *dielectric* style, each particle can be either a physical
particle (e.g. an ion), or an interface particle representing a particle (e.g. an ion), or an interface particle representing a boundary
boundary element. For physical particles, the per-particle properties element between two regions of different dielectric constant. For
are the same as atom_style full. For interface particles, in addition interface particles, in addition to the properties associated with
to these properties, each particle also has an area, a normal unit atom_style full, each particle also should be assigned a normal unit
vector, a mean local curvature, the mean and difference of the vector (defined by normx, normy, normz), an area (area/patch), the
dielectric constants of two sides of the interface, and the local difference and mean of the dielectric constants of two sides of the
dielectric constant at the boundary element. The distinction between interface along the direction of the normal vector (ed and em), the
the physical and interface particles is only meaningful when :doc:`fix local dielectric constant at the boundary element (epsilon), and a mean
polarize <fix_polarize>` commands are applied to the interface local curvature (curv). Physical particles must be assigned these
particles. values, as well, but only their local dielectric constants will be used;
see documentation for associated :doc:`pair styles <pair_dielectric>`
and :doc:`fixes <fix_polarize>`. The distinction between the physical
and interface particles is only meaningful when :doc:`fix polarize
<fix_polarize>` commands are applied to the interface particles. This
style is part of the DIELECTRIC package.
For the *dipole* style, a point dipole is defined for each point For the *dipole* style, a point dipole is defined for each point
particle. Note that if you wish the particles to be finite-size particle. Note that if you wish the particles to be finite-size

View File

@ -16,11 +16,11 @@ Syntax
.. parsed-literal:: .. parsed-literal::
fix ID group-ID style nevery tolerance ... fix ID group-ID style nevery tolerance
* ID, group-ID are documented in :doc:`fix <fix>` command * ID, group-ID are documented in :doc:`fix <fix>` command
* style = *polarize/bem/gmres* or *polarize/bem/icc* or *polarize/functional* * style = *polarize/bem/gmres* or *polarize/bem/icc* or *polarize/functional*
* Nevery = this fixed is invoked every this many timesteps * nevery = this fixed is invoked every this many timesteps
* tolerance = the relative tolerance for the iterative solver to stop * tolerance = the relative tolerance for the iterative solver to stop
@ -46,44 +46,53 @@ Description
These fixes compute induced charges at the interface between two These fixes compute induced charges at the interface between two
impermeable media with different dielectric constants. The interfaces impermeable media with different dielectric constants. The interfaces
need to be discretized into vertices, each representing a boundary element. need to be discretized into vertices, each representing a boundary
The vertices are treated as if they were regular atoms or particles. element. The vertices are treated as if they were regular atoms or
:doc:`atom_style dielectric <atom_style>` should be used since it defines particles. :doc:`atom_style dielectric <atom_style>` should be used
the additional properties of each interface particle such as since it defines the additional properties of each interface particle
interface normal vectors, element areas, and local dielectric mismatch. such as interface normal vectors, element areas, and local dielectric
These fixes also require the use of :doc:`pair_style <pair_style>` and mismatch. These fixes also require the use of :doc:`pair_style
:doc:`kspace_style <kspace_style>` with the *dielectric* suffix. <pair_style>` and :doc:`kspace_style <kspace_style>` with the
At every time step, given a configuration of the physical charges in the system *dielectric* suffix. At every time step, given a configuration of the
(such as atoms and charged particles) these fixes compute and update physical charges in the system (such as atoms and charged particles)
the charge of the interface particles. The interfaces are allowed to move these fixes compute and update the charge of the interface
during the simulation with appropriate time integrators (for example, particles. The interfaces are allowed to move during the simulation if
with :doc:`fix_rigid <fix_rigid>`). the appropriate time integrators are also set (for example, with
:doc:`fix_rigid <fix_rigid>`).
Consider an interface between two media: one with dielectric constant Consider an interface between two media: one with dielectric constant of
of 78 (water), the other of 4 (silica). The interface is discretized 78 (water), the other of 4 (silica). The interface is discretized into
into 2000 boundary elements, each represented by an interface particle. Suppose that 2000 boundary elements, each represented by an interface
each interface particle has a normal unit vector pointing from the silica medium to water. particle. Suppose that each interface particle has a normal unit vector
The dielectric difference along the normal vector is then 78 - 4 = 74, pointing from the silica medium to water. The dielectric difference
the mean dielectric value is (78 + 4) / 2 = 41. Each boundary element along the normal vector is then 78 - 4 = 74, the mean dielectric value
also has its area and the local mean curvature (which is used by these fixes is (78 + 4) / 2 = 41. Each boundary element also has its area and the
for computing a correction term in the local electric field). local mean curvature, which is used by these fixes for computing a
To model charged interfaces, the interface particle will have a non-zero charge value, correction term in the local electric field. To model charged
interfaces, the interface particle will have a non-zero charge value,
coming from its area and surface charge density. coming from its area and surface charge density.
For non-interface particles such as atoms and charged particles, For non-interface particles such as atoms and charged particles, the
the interface normal vectors, element area, and dielectric mismatch are interface normal vectors, element area, and dielectric mismatch are
irrelevant. Their local dielectric value is used to rescale their actual charge irrelevant and unused. Their local dielectric value is used internally
when computing the Coulombic interactions. For instance, for a cation carrying to rescale their given charge when computing the Coulombic
a charge of +2 (in charge unit) in an implicit solvent with dielectric constant of 40 interactions. For instance, to simulate a cation carrying a charge of +2
would have actual charge of +2, and a local dielectric constant value of 40. (in simulation charge units) in an implicit solvent with a dielectric
It is assumed that the particles cannot pass through the interface during the simulation constant of 40, the cation's charge should be set to +2 and its local
so that its local dielectric constant value does not change. dielectric constant property (defined in the :doc:`atom_style dielectric
<atom_style>`) should be set to 40; there is no need to manually rescale
charge. This will produce the proper force for any :doc:`pair_style
<pair_style>` with the dielectric suffix. It is assumed that the
particles cannot pass through the interface during the simulation
because the value of the local dielectric constant property does not
change.
There are some example scripts for using these fixes There are some example scripts for using these fixes with LAMMPS in the
with LAMMPS in the ``examples/PACKAGES/dielectric`` directory. The README file ``examples/PACKAGES/dielectric`` directory. The README file therein
therein contains specific details on the system setup. Note that the example data files contains specific details on the system setup. Note that the example
show the additional fields (columns) needed for :doc:`atom_style dielectric <atom_style>` data files show the additional fields (columns) needed for
beyond the conventional fields *id*, *mol*, *type*, *q*, *x*, *y*, and *z*. :doc:`atom_style dielectric <atom_style>` beyond the conventional fields
*id*, *mol*, *type*, *q*, *x*, *y*, and *z*.
---------- ----------
@ -104,22 +113,24 @@ the interface, are computed using the equation:
* :math:`\mathbf{E}(\mathbf{s})` is the electrical field at the vertex * :math:`\mathbf{E}(\mathbf{s})` is the electrical field at the vertex
* :math:`\mathbf{n}(\mathbf{s})` is the unit normal vector at the vertex pointing from medium with :math:`\epsilon_2` to that with :math:`\epsilon_1` * :math:`\mathbf{n}(\mathbf{s})` is the unit normal vector at the vertex pointing from medium with :math:`\epsilon_2` to that with :math:`\epsilon_1`
Fix *polarize/bem/gmres* employs the Generalized Minimum Residual (GMRES) Fix *polarize/bem/gmres* employs the Generalized Minimum Residual
as described in :ref:`(Barros) <Barros>` to solve :math:`\sigma_b`. (GMRES) as described in :ref:`(Barros) <Barros>` to solve
:math:`\sigma_b`.
Fix *polarize/bem/icc* employs the successive over-relaxation algorithm Fix *polarize/bem/icc* employs the successive over-relaxation algorithm
as described in :ref:`(Tyagi) <Tyagi>` to solve :math:`\sigma_b`. as described in :ref:`(Tyagi) <Tyagi>` to solve :math:`\sigma_b`.
The iterative solvers would terminate either when the maximum relative change The iterative solvers would terminate either when the maximum relative
in the induced charges in consecutive iterations is below the set tolerance, change in the induced charges in consecutive iterations is below the set
or when the number of iterations reaches *iter_max* (see below). tolerance, or when the number of iterations reaches *iter_max* (see
below).
Fix *polarize/functional* employs the energy functional variation approach Fix *polarize/functional* employs the energy functional variation
as described in :ref:`(Jadhao) <Jadhao>` to solve :math:`\sigma_b`. approach as described in :ref:`(Jadhao) <Jadhao>` to solve
:math:`\sigma_b`.
More details on the implementation of these fixes and their recommended
More details on the implementation of these fixes and their recommended use use are described in :ref:`(NguyenTD) <NguyenTD>`.
are described in :ref:`(NguyenTD) <NguyenTD>`.
Restart, fix_modify, output, run start/stop, minimize info Restart, fix_modify, output, run start/stop, minimize info
@ -127,35 +138,78 @@ Restart, fix_modify, output, run start/stop, minimize info
No information about this fix is written to :doc:`binary restart files <restart>`. No information about this fix is written to :doc:`binary restart files <restart>`.
The :doc:`fix_modify <fix_modify>` command provides certain options to The :doc:`fix_modify <fix_modify>` command provides the ability to modify certain
control the induced charge solver and the initial values of the interface elements: settings:
.. parsed-literal:: .. parsed-literal::
*itr_max* arg *itr_max* arg
arg = maximum number of iterations for convergence arg = maximum number of iterations for convergence
*dielectrics* ediff emean epsilon area charge *dielectrics* ediff emean epsilon area charge
ediff = dielectric difference ediff = dielectric difference or NULL
emean = dielectric mean emean = dielectric mean or NULL
epsilon = local dielectric value epsilon = local dielectric value or NULL
aree = element area area = element area or NULL
charge = real interface charge charge = real interface charge or NULL
*kspace* arg = yes or no
*rand* max seed
max = range of random induced charges to be generated
seed = random number seed to use when generating random charge
*mr* arg
arg = maximum number of q-vectors to use when solving (GMRES only)
*omega* arg
arg = relaxation parameter to use when iterating (ICC only)
*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element vector The *itr_max* keyword sets the max number of iterations to be used for
which can be accessed by various :doc:`output commands <Howto_output>`. solving each step.
The first element is the number of iterations when the solver terminates
(of which the upper bound is set by *iter_max*). The second element is the RMS error. The *dielectrics* keyword allows properties of the atoms in group
*group-ID* to be modified. Values passed to any of the arguments
(*ediff*, *emean*, *epsilon*, *area*, *charge*) will override existing
values for all atoms in the group *group-ID*. Passing NULL to any of
these arguments will preserve the existing value. Note that setting the
properties of the interface this way will change the properties of all
atoms associated with the fix (all atoms in *group-ID*), so multiple fix
and fix_modify commands would be needed to change the properties of two
different interfaces to different values (one fix and fix_modify for
each interface group).
The *kspace* keyword turns on long range interactions.
If the arguments of the *rand* keyword are set, then the atoms subject
to this fix will be assigned a random initial charge in a uniform
distribution from -*max*/2 to *max*/2, using random number seed *seed*.
The *mr* keyword only applies to *style* = *polarize/bem/gmres*. It is
the maximum number of q-vectors to use when solving for the surface
charge.
The *omega* keyword only applies when using *style* =
*polarize/bem/icc*. It is a relaxation parameter defined in
:ref:`(Tyagi) <Tyagi>` that should generally be set between 0 and 2.
Note that the local dielectric constant (epsilon) can also be set
independently using the :doc:`set <set>` command.
----------
*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element
vector which can be accessed by various :doc:`output commands
<Howto_output>`. The first element is the number of iterations when the
solver terminates (of which the upper bound is set by *iter_max*). The
second element is the RMS error.
Restrictions Restrictions
"""""""""""" """"""""""""
These fixes are part of the DIELECTRIC package. It is only enabled These fixes are part of the DIELECTRIC package. They are only enabled
if LAMMPS was built with that package, which requires that also the if LAMMPS was built with that package, which requires that also the
KSPACE package is installed. See the :doc:`Build package KSPACE package is installed. See the :doc:`Build package
<Build_package>` page for more info. <Build_package>` page for more info.
Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only support Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only
:doc:`units <units>` *lj*, *real*, *metal*, *si* and *nano* at the moment. support :doc:`units <units>` *lj*, *real*, *metal*, *si* and *nano* at
the moment.
Related commands Related commands
@ -171,6 +225,15 @@ Default
*iter_max* = 20 *iter_max* = 20
*kspace* = yes
*omega* = 0.7 (ICC only)
*mr* = \# atoms in group *group-ID* minus 1 (GMRES only)
No random charge initialization happens by default.
---------- ----------
.. _Barros: .. _Barros:

View File

@ -76,16 +76,19 @@ Description
""""""""""" """""""""""
All these pair styles are derived from the corresponding pair styles All these pair styles are derived from the corresponding pair styles
without the *dielectric*\ suffix. In addition to computing atom forces without the *dielectric* suffix. In addition to computing atom forces
and energies, these pair styles compute the electrical field vector and energies, these pair styles compute the electric field vector at
at each atom, which are to be used in the :doc:`fix polarize <fix_polarize>` commands. each atom, which are intended to be used by the :doc:`fix polarize
<fix_polarize>` commands to compute induced charges at interfaces
between two regions of different dielectric constant.
These pair styles should be used with :doc:`atom_style dielectric <atom_style>`, These pair styles should be used with :doc:`atom_style dielectric
which uses atom charges rescaled by their local dielectric constant. <atom_style>`.
The styles lj/cut/coul/long/dielectric, lj/cut/coul/msm/dielectric, and The styles lj/cut/coul/long/dielectric, lj/cut/coul/msm/dielectric, and
lj/long/coul/long/dielectric should be used with their kspace style counterparts, lj/long/coul/long/dielectric should be used with their kspace style
namely, pppm/dielectric, pppm/disp/dielectric, and msm/dielectric, respectively. counterparts, namely, pppm/dielectric, pppm/disp/dielectric, and
msm/dielectric, respectively.
---------- ----------
@ -97,24 +100,27 @@ Mixing, shift, table, tail correction, restart, rRESPA info
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
For atom type pairs I,J and I != J, the epsilon and sigma coefficients For atom type pairs I,J and I != J, the epsilon and sigma coefficients
and cutoff distances for this pair style can be mixed. The default and cutoff distances for this pair style can be mixed. The default mix
mix value is *geometric*\ . See the "pair_modify" command for details. algorithm is *geometric*\ . See the :doc:`pair_modify <pair_modify>`"
command for details.
The :doc:`pair_modify <pair_modify>` table option is not relevant The :doc:`pair_modify <pair_modify>` table option is not relevant
for this pair style. for this pair style.
This pair style writes its information to :doc:`binary restart files <restart>`, so pair_style and pair_coeff commands do not need These pair styles write its information to :doc:`binary restart files
to be specified in an input script that reads a restart file. <restart>`, so pair_style and pair_coeff commands do not need to be
specified in an input script that reads a restart file.
This pair style can only be used via the *pair* keyword of the These pair styles can only be used via the *pair* keyword of the
:doc:`run_style respa <run_style>` command. It does not support the :doc:`run_style respa <run_style>` command. It does not support the
*inner*, *middle*, *outer* keywords. *inner*, *middle*, *outer* keywords.
Restrictions Restrictions
"""""""""""" """"""""""""
These styles are part of the DIELECTRIC package. They are only enabled if These styles are part of the DIELECTRIC package. They are only enabled
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info. if LAMMPS was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
Related commands Related commands
"""""""""""""""" """"""""""""""""

View File

@ -600,6 +600,7 @@ Cummins
Cundall Cundall
cundall cundall
Curk Curk
curv
Cusentino Cusentino
customIDs customIDs
cutbond cutbond

View File

@ -2,7 +2,7 @@ This folder contains some example data and input scripts for the DIELECTRIC pack
Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface polarization effects into large-scale coarse-grained molecular dynamics simulation, Computer Physics Communications 2019, 241, 80--91. Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface polarization effects into large-scale coarse-grained molecular dynamics simulation, Computer Physics Communications 2019, 241, 80--91.
- data.confined : two point opposite charges confined between two interfaces (epsilon1=2/epsilon2=10/epsilon2=2) - data.confined : two point opposite charges confined between two interfaces (epsilon2=2/epsilon1=10/epsilon2=2)
- data.sphere : two point opposite charges outside a spherical interface (epsilon_in=1/epsilon2=10) - data.sphere : two point opposite charges outside a spherical interface (epsilon_in=1/epsilon2=10)
- in.confined : read in data.confined - in.confined : read in data.confined
@ -10,7 +10,7 @@ Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface po
For "atom_style dielectric" the Atoms section in the data file contains 15 following columns: For "atom_style dielectric" the Atoms section in the data file contains 15 following columns:
id mol type q x y z normx normy normz area_per_patch ed em epsilon curvature id mol type q x y z normx normy normz area/patch ed em epsilon curvature
where where
@ -34,9 +34,13 @@ where
For interface particles, epsilon is set to be em For interface particles, epsilon is set to be em
(the mean dielectric value above). (the mean dielectric value above).
* area_per_patch: the surface area of the patch (element). * area/patch: the surface area of the patch (element).
For real charges, this value is irrelevant, can be 1.0. For real charges, this value is irrelevant, can be 1.0.
* curvature: surface mean curvature at the patch. * curvature: surface mean curvature at the patch.
For example, for spherical interfaces, curvature = 1/spherical radius. For example, for spherical interfaces, curvature = 1/spherical radius.
For planar interfaces, curvature = 0. For planar interfaces, curvature = 0.
Note that the properties normx, normy, normz, area/patch, ed, em, and curvature are not
used for the non-interface beads. epsilon is used to scale the charge of any non-interface
ion, see the documentation for pair styles with the dielectric suffix and fix polarize.

View File

@ -7,7 +7,7 @@
# Dielectric constants can be set to be different from the input data file # Dielectric constants can be set to be different from the input data file
variable epsilon1 index 20 variable epsilon1 index 20
variable epsilon2 index 8 variable epsilon2 index 10
variable data index data.confined variable data index data.confined

View File

@ -1,5 +1,5 @@
# /* ---------------------------------------------------------------------- # /* ----------------------------------------------------------------------
# Generic Linux Makefile for OpenCL # Linux Makefile for Intel oneAPI - Mixed precision
# ------------------------------------------------------------------------- */ # ------------------------------------------------------------------------- */
# which file will be copied to Makefile.lammps # which file will be copied to Makefile.lammps
@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl
LMP_INC = -DLAMMPS_SMALLBIG LMP_INC = -DLAMMPS_SMALLBIG
OCL_INC = OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
OCL_LINK = -lOpenCL -qoverride-limits
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
BIN_DIR = ./ BIN_DIR = ./
OBJ_DIR = ./ OBJ_DIR = ./

View File

@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP Dump all compiled OpenCL programs with compiler
flags and build logs flags and build logs
GPU_CAST Casting performed on GPU, untested recently GPU_CAST Casting performed on GPU, untested recently
THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested
LAL_SERIALIZE_INIT Force serialization of initialization and compilation
for multiple MPI tasks sharing the same accelerator.
Some accelerator API implementations have had issues
with temporary file conflicts in the past.
GERYON_FORCE_SHARED_MAIN_MEM_ON Should only be used for builds where the
accelerator is guaranteed to share physical
main memory with the host (e.g. integrated
GPU or CPU device). Default behavior is to
auto-detect. Impacts OpenCL only.
GERYON_FORCE_SHARED_MAIN_MEM_OFF Should only be used for builds where the
accelerator is guaranteed to have discrete
physical main memory vs the host (discrete
GPU card). Default behavior is to
auto-detect. Impacts OpenCL only.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -126,10 +126,13 @@ class UCL_Device {
/// Return the number of devices that support OpenCL /// Return the number of devices that support OpenCL
inline int num_devices() { return _num_devices; } inline int num_devices() { return _num_devices; }
/// Specify whether profiling (device timers) will be used for the device (yes=true) /// Specify whether profiling (device timers) will be used (yes=true)
/** No-op for CUDA and HIP **/ /** No-op for CUDA and HIP **/
inline void configure_profiling(const bool profiling_on) inline void configure_profiling(const bool profiling_on) {
{ _cq_profiling = profiling_on; } #ifndef GERYON_NO_OCL_MARKERS
_cq_profiling = profiling_on;
#endif
}
/// Set the OpenCL device to the specified device number /// Set the OpenCL device to the specified device number
/** A context and default command queue will be created for the device * /** A context and default command queue will be created for the device *
@ -176,8 +179,8 @@ class UCL_Device {
#ifdef CL_VERSION_2_0 #ifdef CL_VERSION_2_0
if (_cq_profiling) { if (_cq_profiling) {
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, cl_queue_properties props[] = {CL_QUEUE_PROPERTIES,
0}; CL_QUEUE_PROFILING_ENABLE, 0};
_cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props, _cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props,
&errorv); &errorv);
} else { } else {
@ -187,8 +190,8 @@ class UCL_Device {
} }
#else #else
if (_cq_profiling) if (_cq_profiling)
_cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE, _cq.back()=clCreateCommandQueue(_context, _cl_device,
&errorv); CL_QUEUE_PROFILING_ENABLE, &errorv);
else else
_cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv); _cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv);
#endif #endif
@ -403,7 +406,11 @@ class UCL_Device {
// Grabs the properties for all devices // Grabs the properties for all devices
UCL_Device::UCL_Device() { UCL_Device::UCL_Device() {
_device=-1; _device=-1;
#ifndef GERYON_NO_OCL_MARKERS
_cq_profiling=true; _cq_profiling=true;
#else
_cq_profiling=false;
#endif
// --- Get Number of Platforms // --- Get Number of Platforms
cl_uint nplatforms; cl_uint nplatforms;
@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) {
_num_devices = 0; _num_devices = 0;
for (int i=0; i<num_unpart; i++) { for (int i=0; i<num_unpart; i++) {
cl_uint num_subdevices = 1; cl_uint num_subdevices = 1;
cl_device_id *subdevice_list = device_list + i;
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
cl_device_affinity_domain adomain; cl_device_affinity_domain adomain;
@ -494,25 +502,29 @@ int UCL_Device::set_platform(int pid) {
props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA; props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
props[2]=0; props[2]=0;
cl_int err = CL_SUCCESS;
if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA) if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL, err = clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices)); &num_subdevices);
if (num_subdevices > 1) { if (err == CL_SUCCESS && num_subdevices > 1) {
cl_device_id *subdevice_list = new cl_device_id[num_subdevices]; subdevice_list = new cl_device_id[num_subdevices];
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices, err = clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices)); subdevice_list, &num_subdevices);
for (cl_uint j=0; j<num_subdevices; j++) { if (err != CL_SUCCESS) {
_cl_devices.push_back(device_list[i]); delete[] subdevice_list;
add_properties(device_list[i]); num_subdevices = 1;
_num_devices++; subdevice_list = device_list + i;
} }
delete[] subdevice_list;
} else {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
} }
#endif #endif
for (cl_uint j=0; j<num_subdevices; j++) {
_num_devices++;
_cl_devices.push_back(subdevice_list[j]);
add_properties(subdevice_list[j]);
}
if (num_subdevices > 1) delete[] subdevice_list;
} // for i } // for i
#endif #endif
@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) {
double arch = static_cast<double>(minor)/10+major; double arch = static_cast<double>(minor)/10+major;
if (arch >= 3.0) if (arch >= 3.0)
op.has_shuffle_support=true; op.has_shuffle_support=true;
op.shared_main_memory=_shared_mem_device(device_list);
} }
delete[] buffer2; delete[] buffer2;
#endif #endif
op.shared_main_memory=_shared_mem_device(device_list);
_properties.push_back(op); _properties.push_back(op);
} }

View File

@ -27,11 +27,15 @@
#include "ocl_macros.h" #include "ocl_macros.h"
#include "ocl_device.h" #include "ocl_device.h"
#ifndef GERYON_NO_OCL_MARKERS
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
#define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event) #define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event)
#else #else
#define UCL_OCL_MARKER clEnqueueMarker #define UCL_OCL_MARKER clEnqueueMarker
#endif #endif
#else
#define UCL_OCL_MARKER(cq,event)
#endif
namespace ucl_opencl { namespace ucl_opencl {
@ -51,8 +55,10 @@ class UCL_Timer {
inline void clear() { inline void clear() {
if (_initialized) { if (_initialized) {
if (has_measured_time) { if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event); clReleaseEvent(start_event);
clReleaseEvent(stop_event); clReleaseEvent(stop_event);
#endif
has_measured_time = false; has_measured_time = false;
} }
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq)); CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
@ -76,8 +82,10 @@ class UCL_Timer {
/// Start timing on default command queue /// Start timing on default command queue
inline void start() { inline void start() {
if (has_measured_time) { if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event); clReleaseEvent(start_event);
clReleaseEvent(stop_event); clReleaseEvent(stop_event);
#endif
has_measured_time = false; has_measured_time = false;
} }
UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&start_event);
@ -91,17 +99,26 @@ class UCL_Timer {
/// Block until the start event has been reached on device /// Block until the start event has been reached on device
inline void sync_start() { inline void sync_start() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
if (has_measured_time) { if (has_measured_time) {
clReleaseEvent(start_event); clReleaseEvent(start_event);
clReleaseEvent(stop_event); clReleaseEvent(stop_event);
has_measured_time = false; has_measured_time = false;
} }
CL_SAFE_CALL(clWaitForEvents(1,&start_event)); #else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
#endif
} }
/// Block until the stop event has been reached on device /// Block until the stop event has been reached on device
inline void sync_stop() { inline void sync_stop() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
#else
CL_SAFE_CALL(clFinish(_cq));
#endif
has_measured_time = true; has_measured_time = true;
} }
@ -126,6 +143,7 @@ class UCL_Timer {
/// Return the time (ms) of last start to stop - Forces synchronization /// Return the time (ms) of last start to stop - Forces synchronization
inline double time() { inline double time() {
if(!has_measured_time) return 0.0; if(!has_measured_time) return 0.0;
#ifndef GERYON_NO_OCL_MARKERS
cl_ulong tstart,tend; cl_ulong tstart,tend;
CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event, CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@ -138,6 +156,11 @@ class UCL_Timer {
clReleaseEvent(stop_event); clReleaseEvent(stop_event);
has_measured_time = false; has_measured_time = false;
return (tend-tstart)*1e-6; return (tend-tstart)*1e-6;
#else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
return 0.0;
#endif
} }
/// Return the time (s) of last start to stop - Forces synchronization /// Return the time (s) of last start to stop - Forces synchronization

View File

@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa,
special_lj, inum, nall, max_nbors, maxspecial, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
BLMF.device->gpu_barrier(); BLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq, gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald); host_special_coul, qqrd2e, g_ewald);
BCLCSMF.device->gpu_barrier(); BCLCSMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq, gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald); host_special_coul, qqrd2e, g_ewald);
BORNCLMF.device->gpu_barrier(); BORNCLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e, host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift); alf, e_shift, f_shift);
BornCWCST.device->gpu_barrier(); BornCWCST.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e, host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift); alf, e_shift, f_shift);
BORNCWMF.device->gpu_barrier(); BORNCWMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
offset, special_lj, inum, nall, max_nbors, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
BORNMF.device->gpu_barrier(); BORNMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv,
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2, BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
host_born3, host_a, host_c, host_d, offset); host_born3, host_a, host_c, host_d, offset);
BORNMF.device->gpu_barrier(); BORNMF.device->serialize_init();
} }
} }

View File

@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_ljsq, host_cut_coulsq, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e); host_special_coul, qqrd2e);
BUCKCMF.device->gpu_barrier(); BUCKCMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
BUCKCLMF.device->gpu_barrier(); BUCKCLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_a, host_c, offset, special_lj, inum, nall, max_nbors, host_a, host_c, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
BUCKMF.device->gpu_barrier(); BUCKMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2, BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
host_a, host_c, offset); host_a, host_c, offset);
BUCKMF.device->gpu_barrier(); BUCKMF.device->serialize_init();
} }
} }

View File

@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj, qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj,
denom_coul, epsilon, sigma, mix_arithmetic); denom_coul, epsilon, sigma, mix_arithmetic);
CRMMF.device->gpu_barrier(); CRMMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon, qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon,
sigma, mix_arithmetic); sigma, mix_arithmetic);
CRMLMF.device->gpu_barrier(); CRMLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
inum, nall, max_nbors, maxspecial, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
COLLMF.device->gpu_barrier(); COLLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors, init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa); maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa);
CDEMF.device->gpu_barrier(); CDEMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
CDEMF.reinit(ntypes, host_scale); CDEMF.reinit(ntypes, host_scale);
CDEMF.device->gpu_barrier(); CDEMF.device->serialize_init();
} }
} }

View File

@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall,
gpu_split, screen, host_cut_coulsq, host_special_coul, gpu_split, screen, host_cut_coulsq, host_special_coul,
qqrd2e, e_shift, f_shift, alpha); qqrd2e, e_shift, f_shift, alpha);
CDMF.device->gpu_barrier(); CDMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale,
init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors, init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e); maxspecial, cell_size, gpu_split, screen, qqrd2e);
COULMF.device->gpu_barrier(); COULMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
COULMF.reinit(ntypes, host_scale); COULMF.reinit(ntypes, host_scale);
COULMF.device->gpu_barrier(); COULMF.device->serialize_init();
} }
} }

View File

@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq, cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald); host_special_coul, qqrd2e, g_ewald);
CLCSMF.device->gpu_barrier(); CLCSMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
CLCSMF.reinit(ntypes, host_scale); CLCSMF.reinit(ntypes, host_scale);
CLCSMF.device->gpu_barrier(); CLCSMF.device->serialize_init();
} }
} }

View File

@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq, cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald); host_special_coul, qqrd2e, g_ewald);
CLMF.device->gpu_barrier(); CLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
CLMF.reinit(ntypes, host_scale); CLMF.reinit(ntypes, host_scale);
CLMF.device->gpu_barrier(); CLMF.device->serialize_init();
} }
} }

View File

@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
for (int i=0; i<_procs_per_gpu; i++) { for (int i=0; i<_procs_per_gpu; i++) {
if (_gpu_rank==i) if (_gpu_rank==i)
flag=compile_kernels(); flag=compile_kernels();
gpu_barrier(); serialize_init();
} }
// check if double precision support is available // check if double precision support is available
@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name,
int last=last_gpu+1; int last=last_gpu+1;
if (last>gpu->num_devices()) if (last>gpu->num_devices())
last=gpu->num_devices(); last=gpu->num_devices();
if (gpu->num_platforms()>1) {
std::string pname=gpu->platform_name();
fprintf(screen,"Platform: %s\n",pname.c_str());
}
for (int i=first_gpu; i<last; i++) { for (int i=first_gpu; i<last; i++) {
std::string sname; std::string sname;
if (i==first_gpu) if (i==first_gpu)

View File

@ -217,6 +217,12 @@ class Device {
inline int gpu_rank() const { return _gpu_rank; } inline int gpu_rank() const { return _gpu_rank; }
/// MPI Barrier for gpu /// MPI Barrier for gpu
inline void gpu_barrier() { MPI_Barrier(_comm_gpu); } inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
/// Serialize GPU initialization and JIT for unsafe platforms
inline void serialize_init() {
#ifdef LAL_SERIALIZE_INIT
gpu_barrier();
#endif
}
/// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH /// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH
inline int gpu_mode() const { return _gpu_mode; } inline int gpu_mode() const { return _gpu_mode; }
/// Index of first device used by a node /// Index of first device used by a node

View File

@ -80,7 +80,7 @@ int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e); host_cut_coulsq, host_special_coul, qqrd2e);
DPLMF.device->gpu_barrier(); DPLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e); host_cut_coulsq, host_special_coul, qqrd2e);
DPLSFMF.device->gpu_barrier(); DPLSFMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
DPLJMF.device->gpu_barrier(); DPLJMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, false, inum, nall, max_nbors, host_cut, special_lj, false, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
DPDMF.device->gpu_barrier(); DPDMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, true, inum, nall, 300, host_cut, special_lj, true, inum, nall, 300,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
DPDTMF.device->gpu_barrier(); DPDTMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
EAMALMF.device->gpu_barrier(); EAMALMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
EAMMF.device->gpu_barrier(); EAMMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
EAMFSMF.device->gpu_barrier(); EAMFSMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
GLMF.device->gpu_barrier(); GLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset); GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
GLMF.device->gpu_barrier(); GLMF.device->serialize_init();
} }
} }

View File

@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma,
host_lj3, host_lj4, offset, special_lj, inum, nall, host_lj3, host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen); max_nbors, maxspecial, cell_size, gpu_split, screen);
GBMF.device->gpu_barrier(); GBMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
LJ96MF.device->gpu_barrier(); LJ96MF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
C2CLMF.device->gpu_barrier(); C2CLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, kappa); host_cut_coulsq, host_special_coul, qqrd2e, kappa);
LJCDMF.device->gpu_barrier(); LJCDMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e); host_cut_coulsq, host_special_coul, qqrd2e);
LJCMF.device->gpu_barrier(); LJCMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJCLMF.device->gpu_barrier(); LJCLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, host_cut_ljsq); offset, host_cut_ljsq);
LJCLMF.device->gpu_barrier(); LJCLMF.device->serialize_init();
} }
} }

View File

@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, order, qqrd2e); host_cut_coulsq, host_special_coul, order, qqrd2e);
LJCMLMF.device->gpu_barrier(); LJCMLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
special_lj, inum, nall, max_nbors, maxspecial, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
LJCubicLMF.device->gpu_barrier(); LJCubicLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_cut_coulsq, host_special_coul, qqrd2e, e_shift, host_cut_coulsq, host_special_coul, qqrd2e, e_shift,
f_shift, alpha); f_shift, alpha);
LJDMF.device->gpu_barrier(); LJDMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJECLMF.device->gpu_barrier(); LJECLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift, host_cut_ljsq); offset, shift, host_cut_ljsq);
LJECLMF.device->gpu_barrier(); LJECLMF.device->serialize_init();
} }
} }

View File

@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift); offset, shift);
LJEMF.device->gpu_barrier(); LJEMF.device->serialize_init();
} }
} }

View File

@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
LJLMF.device->gpu_barrier(); LJLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) { for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset); LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
LJLMF.device->gpu_barrier(); LJLMF.device->serialize_init();
} }
} }

View File

@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3, gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq); host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq);
LJGRMMF.device->gpu_barrier(); LJGRMMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, cut_inner, cut_inner_sq); host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier(); LJSMTMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) { for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier(); LJSMTMF.device->serialize_init();
} }
} }

View File

@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types,
host_lj4, offset, special_lj, inum, nall, max_nbors, host_lj4, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
CMMMF.device->gpu_barrier(); CMMMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type,
maxspecial, cell_size, gpu_split, screen, maxspecial, cell_size, gpu_split, screen,
host_cut_ljsq, host_cut_coulsq, host_special_coul, host_cut_ljsq, host_cut_coulsq, host_special_coul,
qqrd2e, g_ewald); qqrd2e, g_ewald);
CMMLMF.device->gpu_barrier(); CMMLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_special_coul, qqrd2e, host_special_coul, qqrd2e,
g_ewald, map_size, max_same); g_ewald, map_size, max_same);
LJTIP4PLMF.device->gpu_barrier(); LJTIP4PLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
MLMF.device->gpu_barrier(); MLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
MORMF.device->gpu_barrier(); MORMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm, vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
split,success); split,success);
pppm.device->gpu_barrier(); pppm.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq,
host_lj4, offset, special_lj, inum, nall, host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen); max_nbors, maxspecial, cell_size, gpu_split, screen);
REMF.device->gpu_barrier(); REMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
special_lj, inum, nall, max_nbors, maxspecial, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
SLMF.device->gpu_barrier(); SLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut); SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
SLMF.device->gpu_barrier(); SLMF.device->serialize_init();
} }
} }

View File

@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall,
sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon, sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon,
costheta, map, e2param); costheta, map, e2param);
SWMF.device->gpu_barrier(); SWMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs,
special_lj, inum, nall, max_nbors, maxspecial, cell_size, special_lj, inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen, tabstyle, ntables, tablength); gpu_split, screen, tabstyle, ntables, tablength);
TBMF.device->gpu_barrier(); TBMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h, ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
ts_gamma, ts_beta, ts_powern, ts_cutsq); ts_gamma, ts_beta, ts_powern, ts_cutsq);
TSMF.device->gpu_barrier(); TSMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern, ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
ts_powern_del, ts_ca1, ts_cutsq); ts_powern_del, ts_ca1, ts_cutsq);
TSMMF.device->gpu_barrier(); TSMMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0, ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
global_epsilon_0, ts_cutsq); global_epsilon_0, ts_cutsq);
TSZMF.device->gpu_barrier(); TSZMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
offset, special_lj, inum, nall, max_nbors, maxspecial, offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen); cell_size, gpu_split, screen);
UFMLMF.device->gpu_barrier(); UFMLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }
@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
for (int i=0; i<procs_per_gpu; i++) { for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0) if (gpu_rank==i && world_me!=0)
UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset); UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset);
UFMLMF.device->gpu_barrier(); UFMLMF.device->serialize_init();
} }
} }

View File

@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i
lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
c0, costheta, bigb, big2b, bigc); c0, costheta, bigb, big2b, bigc);
VashishtaMF.device->gpu_barrier(); VashishtaMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split, inum, nall, max_nbors, maxspecial, cell_size, gpu_split,
screen, kappa); screen, kappa);
YKCOLLMF.device->gpu_barrier(); YKCOLLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
inum, nall, max_nbors, maxspecial, cell_size, inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen); gpu_split, screen);
YKMF.device->gpu_barrier(); YKMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
cut_globalsq, cut_innersq, cut_inner, cut_globalsq, cut_innersq, cut_inner,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen);
ZBLMF.device->gpu_barrier(); ZBLMF.device->serialize_init();
if (message) if (message)
fprintf(screen,"Done.\n"); fprintf(screen,"Done.\n");
} }

View File

@ -27,7 +27,7 @@ When configuring your project just set:
-DKokkos_ROOT=${kokkos_install_prefix} \ -DKokkos_ROOT=${kokkos_install_prefix} \
-DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos} -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos}
```` ````
Note: You may need the following if using some versions of CMake (e.g. 3.12): Note: You may need the following if your project requires a minimum CMake version older than 3.12:
````cmake ````cmake
cmake_policy(SET CMP0074 NEW) cmake_policy(SET CMP0074 NEW)
```` ````
@ -171,6 +171,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`.
* Kokkos_ENABLE_HPX_ASYNC_DISPATCH * Kokkos_ENABLE_HPX_ASYNC_DISPATCH
* Whether HPX supports asynchronous dispatch * Whether HPX supports asynchronous dispatch
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC
* Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details.
* BOOL Default: OFF
* Kokkos_ENABLE_LARGE_MEM_TESTS * Kokkos_ENABLE_LARGE_MEM_TESTS
* Whether to perform extra large memory tests * Whether to perform extra large memory tests
* BOOL_Default: OFF * BOOL_Default: OFF
@ -235,6 +238,9 @@ The following options control `find_package` paths for CMake-based TPLs:
## Architecture Keywords ## Architecture Keywords
Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`.
* Kokkos_ARCH_NATIVE
* Whether to optimize for the the local CPU architecture
* BOOL Default: OFF
* Kokkos_ARCH_AMDAVX * Kokkos_ARCH_AMDAVX
* Whether to optimize for the AMDAVX architecture * Whether to optimize for the AMDAVX architecture
* BOOL Default: OFF * BOOL Default: OFF
@ -310,12 +316,24 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_
* Kokkos_ARCH_POWER9 * Kokkos_ARCH_POWER9
* Whether to optimize for the POWER9 architecture * Whether to optimize for the POWER9 architecture
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ARCH_ICL
* Whether to optimize for the ICL architecture
* BOOL Default: OFF
* Kokkos_ARCH_ICX
* Whether to optimize for the ICX architecture
* BOOL Default: OFF
* Kokkos_ARCH_SKL
* Whether to optimize for the SKL architecture
* BOOL Default: OFF
* Kokkos_ARCH_SKX * Kokkos_ARCH_SKX
* Whether to optimize for the SKX architecture * Whether to optimize for the SKX architecture
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ARCH_SNB * Kokkos_ARCH_SNB
* Whether to optimize for the SNB architecture * Whether to optimize for the SNB architecture
* BOOL Default: OFF * BOOL Default: OFF
* Kokkos_ARCH_SPR
* Whether to optimize for the SPR architecture
* BOOL Default: OFF
* Kokkos_ARCH_TURING75 * Kokkos_ARCH_TURING75
* Whether to optimize for the TURING75 architecture * Whether to optimize for the TURING75 architecture
* BOOL Default: OFF * BOOL Default: OFF

View File

@ -1,5 +1,157 @@
# Change Log # Change Log
## [3.7.00](https://github.com/kokkos/kokkos/tree/3.7.00) (2022-08-22)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.01...3.7.00)
### Features:
- Use non-volatile `join()` member functions and `operator+=` in `parallel_reduce/scan` [\#4931](https://github.com/kokkos/kokkos/pull/4931) [\#4954](https://github.com/kokkos/kokkos/pull/4954) [\#4951](https://github.com/kokkos/kokkos/pull/4951)
- Add `SIMD` sub package (requires C++17) [\#5016](https://github.com/kokkos/kokkos/pull/5016)
- Add `is_finalized()` [\#5247](https://github.com/kokkos/kokkos/pull/5247)
- Promote mathematical functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#4791](https://github.com/kokkos/kokkos/pull/4791)
- Promote `min`, `max`, `clamp`, `minmax` functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#5170](https://github.com/kokkos/kokkos/pull/5170)
- Add `round`, `logb`, `nextafter`, `copysign`, and `signbit` math functions [\#4768](https://github.com/kokkos/kokkos/pull/4768)
- Add `HIPManagedSpace`, similar to `CudaUVMSpace` [\#5112](https://github.com/kokkos/kokkos/pull/5112)
- Accept view construction allocation properties in `create_mirror[_view,_view_and_copy]` and `resize/realloc` [\#5125](https://github.com/kokkos/kokkos/pull/5125) [\#5095](https://github.com/kokkos/kokkos/pull/5095) [\#5035](https://github.com/kokkos/kokkos/pull/5035) [\#4805](https://github.com/kokkos/kokkos/pull/4805) [\#4844](https://github.com/kokkos/kokkos/pull/4844)
- Allow `MemorySpace::allocate()` to be called with execution space [\#4826](https://github.com/kokkos/kokkos/pull/4826)
- Experimental: Compile time view subscriber [\#4197](https://github.com/kokkos/kokkos/pull/4197)
### Backends and Archs Enhancements:
- Add support for Sapphire Rapids Intel architecture [\#5015](https://github.com/kokkos/kokkos/pull/5015)
- Add support for ICX, SKL and ICL Intel architectures [\#5013](https://github.com/kokkos/kokkos/pull/5013) [\#4929](https://github.com/kokkos/kokkos/pull/4929)
- Add arch flags for Intel GPU Ponte Vecchio [\#4932](https://github.com/kokkos/kokkos/pull/4932)
- SYCL: require GPU if GPU architecture was set at configuration time (i.e. do not allow fallback to CPU device) [\#5264](https://github.com/kokkos/kokkos/pull/5264) [\#5222](https://github.com/kokkos/kokkos/pull/5222)
- SYCL: Add `SYCL::sycl_queue()` for interoperability [\#5241](https://github.com/kokkos/kokkos/pull/5241)
- SYCL: Loosen restriction for using built-in `sycl::group_broadcast` [\#4552](https://github.com/kokkos/kokkos/pull/4552)
- SYCL: preserve address space [\#4396](https://github.com/kokkos/kokkos/pull/4396)
- OpenMPTarget: Adding a workaound for team scan [\#5219](https://github.com/kokkos/kokkos/pull/5219)
- OpenMPTarget: Adding logic to skip the kernel launch if `league_size=0` [\#5067](https://github.com/kokkos/kokkos/pull/5067)
- OpenMPTarget: Make sure `Kokkos::abort()` causes abnormal program termination when called on the host-side [\#4808](https://github.com/kokkos/kokkos/pull/4808)
- HIP: Make HIPHostPinnedSpace coarse-grained [\#5152](https://github.com/kokkos/kokkos/pull/5152)
- Refactor OpenMP `parallel_for` implementation to use more native OpenMP constructs [\#4664](https://github.com/kokkos/kokkos/pull/4664)
- Add option to optimize for local CPU architecture `Kokkos_ARCH_NATIVE` [\#4930](https://github.com/kokkos/kokkos/pull/4930)
### Implemented enhancements
- Add command line argument/environment variable to print the configuration [\#5233](https://github.com/kokkos/kokkos/pull/5233)
- Improve error message in view memory access violations [\#4950](https://github.com/kokkos/kokkos/pull/4950)
- Remove unnecessary fences in View initialization [\#4823](https://github.com/kokkos/kokkos/pull/4823)
- Make `View::shmem_size()` device-callable [\#4936](https://github.com/kokkos/kokkos/pull/4936)
- Update numerics support for `__float128` [\#5081](https://github.com/kokkos/kokkos/pull/5081)
- Add `log10` overload for `Kokkos::complex` [\#5009](https://github.com/kokkos/kokkos/pull/5009)
- Add `[[nodiscard]]` to `ScopeGuard` [\#5224](https://github.com/kokkos/kokkos/pull/5224)
- Add structured binding support for `Kokkos::Array` [\#4962](https://github.com/kokkos/kokkos/pull/4962)
- Enable accessing `Kokkos::Array` elements in constant expressions [\#4916](https://github.com/kokkos/kokkos/pull/4916)
- Mark `as_view_of_rank_n` as KOKKOS_FUNCTION [\#5248](https://github.com/kokkos/kokkos/pull/5248)
- Cleanup/rework fence overloads [\#5148](https://github.com/kokkos/kokkos/pull/5148)
- Assert that `Layout` construction from extents is valid in functions taking integer extents [\#5209](https://github.com/kokkos/kokkos/pull/5209)
- Add `fill_random` overload that takes an execution space as first argument [\#5181](https://github.com/kokkos/kokkos/pull/5181)
- Avoid some unnecessary fences in `parallel_reduce/scan` [\#5154](https://github.com/kokkos/kokkos/pull/5154)
- Include `KOKKOS_ENABLE_LIBDL` in options when printing configuration [\#5086](https://github.com/kokkos/kokkos/pull/5086)
- DynRankView: make `layout()` return the same as a corresponding static View [\#5026](https://github.com/kokkos/kokkos/pull/5026)
- Use `_mm_malloc` for icpx [\#5012](https://github.com/kokkos/kokkos/pull/5012)
- Avoid forcing matching execution spaces in `BinSort` constructor and `sort()` [\#4919](https://github.com/kokkos/kokkos/pull/4919)
- Check number of bins in `BinSort` [\#4890](https://github.com/kokkos/kokkos/pull/4890)
- Improve performance in parallel STL-like algorithms [\#4887](https://github.com/kokkos/kokkos/pull/4887) [\#4886](https://github.com/kokkos/kokkos/pull/4886)
- Disable `memset` on A64FX and launch `parallel_for` instead (performance) [\#4884](https://github.com/kokkos/kokkos/pull/4884)
- Allow non-power-of-two team sizes for team reductions and scans [\#4809](https://github.com/kokkos/kokkos/pull/4809)
#### Harmonization of Kokkos execution environment initialization:
- Warn when unable to detect local MPI rank and user explicitly asked for it [\#5263](https://github.com/kokkos/kokkos/pull/5263)
- Refactor parsing of command line arguments and environment variables [\#5221](https://github.com/kokkos/kokkos/pull/5221)
- Refactor device selection at initialization [\#5211](https://github.com/kokkos/kokkos/pull/5211)
- Rename tools settings for consistency [\#5201](https://github.com/kokkos/kokkos/pull/5201)
- Print help only once [\#5128](https://github.com/kokkos/kokkos/pull/5128)
- Update precedence rule in initialization [\#5130](https://github.com/kokkos/kokkos/pull/5130)
- Warn instead of just ignoring user settings when kokkos-tools is disabled [\#5088](https://github.com/kokkos/kokkos/pull/5088)
- Drop numa args in threads backend initialization [\#5127](https://github.com/kokkos/kokkos/pull/5127)
- Warn users when a flag prefixed with -[-]kokkos is not recognized and do not remove it [\#5256](https://github.com/kokkos/kokkos/pull/5256)
- Give back to Core what belongs to Core (aka moving tune_internals option from Tools back to Core) [\#5202](https://github.com/kokkos/kokkos/pull/5202)
#### Build system updates:
- `nvcc_wrapper`: filter out -pedantic-errors from nvcc options [\#5235](https://github.com/kokkos/kokkos/pull/5235)
- `nvcc_wrapper`: add known nvcc option --source-in-ptx [\#5052](https://github.com/kokkos/kokkos/pull/5052)
- Link libdl as interface library [\#5179](https://github.com/kokkos/kokkos/pull/5179)
- Only show GPU architectures with enabled corresponding backend [\#5119](https://github.com/kokkos/kokkos/pull/5119)
- Enable optional external desul build [\#5021](https://github.com/kokkos/kokkos/pull/5021) [\#5132](https://github.com/kokkos/kokkos/pull/5132)
- Export `Kokkos_CXX_STANDARD` variable with CMake [\#5068](https://github.com/kokkos/kokkos/pull/5068)
- Suppress warnings with nvc++ [\#5031](https://github.com/kokkos/kokkos/pull/5031)
- Disallow multiple host architectures in CMake [\#4996](https://github.com/kokkos/kokkos/pull/4996)
- Do not include compiler warning flags in the compile option of the cmake target [\#4989](https://github.com/kokkos/kokkos/pull/4989)
- AOT flags for OpenMPTarget targeting Intel GPUs [\#4915](https://github.com/kokkos/kokkos/pull/4915)
- Repurpose `Kokkos_ARCH_INTEL_GEN` for SYCL to mean JIT to be conforming with OMPT [\#4894](https://github.com/kokkos/kokkos/pull/4894)
- Replace amdgpu-target with offload-arch [\#4874](https://github.com/kokkos/kokkos/pull/4874)
- Do not enable `kokkos_launch_compiler` when `CMAKE_CXX_COMPILER_LAUNCHER` is set [\#4870](https://github.com/kokkos/kokkos/pull/4870)
- Move CMake version check up [\#4797](https://github.com/kokkos/kokkos/pull/4797)
### Incompatibilities:
- Remove `KOKKOS_THREAD_LOCAL` [\#5064](https://github.com/kokkos/kokkos/pull/5064)
- Remove `KOKKOS_ENABLE_POSIX_MEMALIGN` [\#5011](https://github.com/kokkos/kokkos/pull/5011)
- Remove unused `KOKKOS_ENABLE_TM` [\#4995](https://github.com/kokkos/kokkos/pull/4995)
- Remove unused cmakedefine `KOKKOS_ENABLE_COMPILER_WARNINGS` [\#4883](https://github.com/kokkos/kokkos/pull/4883)
- Remove unused `KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK` [\#4882](https://github.com/kokkos/kokkos/pull/4882)
- Drop Instruction Set Architecture (ISA) macros [\#4981](https://github.com/kokkos/kokkos/pull/4981)
- Warn in `ScopeGuard` about illegal usage [\#5250](https://github.com/kokkos/kokkos/pull/5250)
### Deprecations:
- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178)
- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230)
- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141)
- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185)
- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135)
- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134)
- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120)
- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117)
- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111)
- Deprecate `Kokkos::common_view_alloc_prop` [\#5059](https://github.com/kokkos/kokkos/pull/5059)
- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957)
- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810)
- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382)
- Warn about `parallel_reduce` cases that call `join()` with volatile-qualified arguments [\#5215](https://github.com/kokkos/kokkos/pull/5215)
### Bug Fixes:
- CUDA Reductions: Fix data races reported by Nvidia `compute-sanitizer` [\#4855](https://github.com/kokkos/kokkos/pull/4855)
- Work around Intel compiler bug [\#5301](https://github.com/kokkos/kokkos/pull/5301)
- Avoid allocating memory for UniqueToken [\#5300](https://github.com/kokkos/kokkos/pull/5300)
- DynamicView: Properly resize mirror instances after construction [\#5276](https://github.com/kokkos/kokkos/pull/5276)
- Remove Kokkos::Rank limit of 6 ranks [\#5271](https://github.com/kokkos/kokkos/pull/5271)
- Do not forget to set last element to nullptr when removing a flag in `Kokkos::initialize` [\#5272](https://github.com/kokkos/kokkos/pull/5272)
- Fix CUDA+MSVC build issue [\#5261](https://github.com/kokkos/kokkos/pull/5261)
- Fix `DynamicView::resize_serial` [\#5220](https://github.com/kokkos/kokkos/pull/5220)
- Fix cmake default compiler flags for unknown compiler [\#5217](https://github.com/kokkos/kokkos/pull/5217)
- Fix `move_backward` [\#5191](https://github.com/kokkos/kokkos/pull/5191)
- Fixing issue 5196 - missing symbol with intel compiler [\#5207](https://github.com/kokkos/kokkos/pull/5207)
- Preserve `KOKKOS_INVALID_INDEX` in ViewDimension and ArrayLayout construction [\#5188](https://github.com/kokkos/kokkos/pull/5188)
- Finalize `deep_copy_space` early avoiding printing to `std::cerr` for Cuda [\#5151](https://github.com/kokkos/kokkos/pull/5151)
- Use correct policy in Threads MDRange `parallel_reduce` [\#5123](https://github.com/kokkos/kokkos/pull/5123)
- Fix building with NVCC as the CXX compiler while the CUDA backend is not enabled [\#5115](https://github.com/kokkos/kokkos/pull/5115)
- OpenMPTarget Index range fix for MDRange. [\#5089](https://github.com/kokkos/kokkos/pull/5089)
- Fix bug with CUDA's team reduction for empty ranges [\#5079](https://github.com/kokkos/kokkos/pull/5079)
- Fix using `ZeroMemset` for Serial [\#5077](https://github.com/kokkos/kokkos/pull/5077)
- Fix `Kokkos::Vector::push_back` for default execution space [\#5047](https://github.com/kokkos/kokkos/pull/5047)
- ScatterView: Fix ScatterMin/ScatterMax to use proper atomics [\#5045](https://github.com/kokkos/kokkos/pull/5045)
- Fix calling `ZeroMemset` in `deep_copy` [\#5040](https://github.com/kokkos/kokkos/pull/5040)
- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024)
- Guard against unrecognized pragma with intel compilers [\#5019](https://github.com/kokkos/kokkos/pull/5019)
- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008)
- KokkosP: Fix `device_id` in profiling [\#4997](https://github.com/kokkos/kokkos/pull/4997)
- Fix for `Kokkos::vector::insert` into empty vector with begin and end iterators [\#4988](https://github.com/kokkos/kokkos/pull/4988)
- Fix Core header files installation [\#4984](https://github.com/kokkos/kokkos/pull/4984)
- Fix bounds errors with `Kokkos::sort` [\#4980](https://github.com/kokkos/kokkos/pull/4980)
- Fixup let `RangePolicy::set_chunk_size` return a reference to self [\#4918](https://github.com/kokkos/kokkos/pull/4918)
- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907)
- Fix combined reductions with `Kokkos::View` [\#4896](https://github.com/kokkos/kokkos/pull/4896)
- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893)
- Fixup `View::access()` truncate parameter pack [\#4876](https://github.com/kokkos/kokkos/pull/4876)
- Fix `abort` with HIP backend for ROCm 5.0.2 and beyond [\#4873](https://github.com/kokkos/kokkos/pull/4873)
- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872)
- Fix scratch lock array when using scratch level 1 [\#4871](https://github.com/kokkos/kokkos/pull/4871)
- Fix Makefile.kokkos to work with fujitsu compiler [\#4867](https://github.com/kokkos/kokkos/pull/4867)
- cmake: Correct link THREADS link option [\#4854](https://github.com/kokkos/kokkos/pull/4854)
- UniqueToken `impl_acquire` function should be device only [\#4819](https://github.com/kokkos/kokkos/pull/4819)
- Fix example calls to non existing static `print_configuration` [\#4806](https://github.com/kokkos/kokkos/pull/4806)
- Fix requests for large team scratch sizes [\#4728](https://github.com/kokkos/kokkos/pull/4728)
## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23) ## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01)

View File

@ -1,3 +1,4 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
# Disable in-source builds to prevent source tree corruption. # Disable in-source builds to prevent source tree corruption.
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" )
@ -28,11 +29,6 @@ SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
# Needed to simplify syntax of if statements
CMAKE_POLICY(SET CMP0054 NEW)
# Needed to make IN_LIST a valid operator
CMAKE_POLICY(SET CMP0057 NEW)
# Is this a build as part of Trilinos? # Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL) IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON) SET(KOKKOS_HAS_TRILINOS ON)
@ -72,7 +68,6 @@ ENDFUNCTION()
LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) LIST(APPEND CMAKE_MODULE_PATH cmake/Modules)
IF(NOT KOKKOS_HAS_TRILINOS) IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
@ -80,7 +75,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
# downstream dependencies need to match this! # downstream dependencies need to match this!
SET(KOKKOS_COMPILE_LANGUAGE CXX) SET(KOKKOS_COMPILE_LANGUAGE CXX)
# use lower case here since we didn't parse options yet # use lower case here since we didn't parse options yet
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA)
# Without this as a language for the package we would get a C++ compiler enabled. # Without this as a language for the package we would get a C++ compiler enabled.
# but we still need a C++ compiler even if we build all our cpp files as CUDA only # but we still need a C++ compiler even if we build all our cpp files as CUDA only
@ -90,9 +85,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
# days. # days.
SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
IF (Kokkos_ENABLE_CUDA) SET(KOKKOS_COMPILE_LANGUAGE CUDA)
SET(KOKKOS_COMPILE_LANGUAGE CUDA)
ENDIF()
ENDIF() ENDIF()
IF (Spack_WORKAROUND) IF (Spack_WORKAROUND)
@ -135,14 +128,11 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3) set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 6) set(Kokkos_VERSION_MINOR 7)
set(Kokkos_VERSION_PATCH 01) set(Kokkos_VERSION_PATCH 00)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
MESSAGE(STATUS "Setting policy CMP0074 to use <Package>_ROOT variables")
CMAKE_POLICY(SET CMP0074 NEW)
# Load either the real TriBITS or a TriBITS wrapper # Load either the real TriBITS or a TriBITS wrapper
# for certain utility functions that are universal (like GLOBAL_SET) # for certain utility functions that are universal (like GLOBAL_SET)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake)
@ -204,11 +194,16 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT()
OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF)
SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms) SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms)
SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) SET(KOKKOS_SUB_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms)
IF (KOKKOS_CXX_STANDARD GREATER_EQUAL 17)
LIST(APPEND KOKKOS_EXT_LIBRARIES Kokkos::kokkossimd)
LIST(APPEND KOKKOS_SUB_LIBRARIES kokkossimd)
ENDIF()
SET(KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_SUB_LIBRARIES})
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES}) SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES})
IF (KOKKOS_HAS_TRILINOS) IF (KOKKOS_HAS_TRILINOS)
SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR})
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR})
SET(KOKKOS_IS_SUBDIRECTORY TRUE) SET(KOKKOS_IS_SUBDIRECTORY TRUE)
ELSEIF(HAS_PARENT) ELSEIF(HAS_PARENT)
@ -296,7 +291,7 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING)
#Make sure in-tree projects can reference this as Kokkos:: #Make sure in-tree projects can reference this as Kokkos::
#to match the installed target names #to match the installed target names
ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos)
TARGET_LINK_LIBRARIES(kokkos INTERFACE kokkoscore kokkoscontainers kokkosalgorithms) TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_SUB_LIBRARIES})
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos)
ENDIF() ENDIF()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)

View File

@ -11,21 +11,21 @@ CXXFLAGS += $(SHFLAGS)
endif endif
KOKKOS_VERSION_MAJOR = 3 KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 6 KOKKOS_VERSION_MINOR = 7
KOKKOS_VERSION_PATCH = 01 KOKKOS_VERSION_PATCH = 00
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
KOKKOS_DEVICES ?= "OpenMP" KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Threads" #KOKKOS_DEVICES ?= "Threads"
# Options: # Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9 # IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906,Vega908,Vega90A # AMD-GPUS: Vega900,Vega906,Vega908,Vega90A
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
# Options: yes,no # Options: yes,no
KOKKOS_DEBUG ?= "no" KOKKOS_DEBUG ?= "no"
@ -193,6 +193,8 @@ KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VE
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
# TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment.
KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU)
# Check Host Compiler if using NVCC through nvcc_wrapper # Check Host Compiler if using NVCC through nvcc_wrapper
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -221,8 +223,23 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
KOKKOS_INTENAL_COMPILER_CLANG = 0 KOKKOS_INTENAL_COMPILER_CLANG = 0
endif endif
# Fujitsu passes also as clang and gcc respectively
ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
# TODO handle gcc flags and workaround for bug?
# fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730
$(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.)
# HACK since fujitsu only accepts some gcc flags, disable gcc here?
# KOKKOS_INTERNAL_COMPILER_GCC = 0
endif
# TODO handle clang flags
# warnings: works fine as is
# openmp: handled
#KOKKOS_INTERNAL_COMPILER_CLANG = 0
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
# TODO empty variable if fujitsu (clang mode) passes as clang
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.') KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.')
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
@ -272,7 +289,12 @@ else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
else else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1)
# fujitsu (clang mode) fails with `=libomp`
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
endif
endif endif
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
@ -300,11 +322,15 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
else else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
#KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping
KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget
else else
#Assume GCC ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping
else
#Assume GCC
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none
endif
endif endif
endif endif
@ -344,8 +370,12 @@ KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL)
KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX)
KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL)
KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
@ -353,6 +383,7 @@ KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)
# NVIDIA based. # NVIDIA based.
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
@ -436,19 +467,9 @@ KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_W
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
# Decide whether we can support transactional memory
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Incompatible flags? # Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -551,36 +572,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE")
tmp := $(call kokkos_append_header,"$H""endif")
endif
#only add the c++ standard flags if this is not CMake #only add the c++ standard flags if this is not CMake
tmp := $(call kokkos_append_header,"/* General Settings */") tmp := $(call kokkos_append_header,"/* General Settings */")
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
@ -1041,7 +1032,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSKYLAKE
KOKKOS_LDFLAGS += -xSKYLAKE
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Nothing here yet.
KOKKOS_CXXFLAGS += -march=skylake
KOKKOS_LDFLAGS += -march=skylake
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
@ -1055,13 +1067,31 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
else else
# Nothing here yet. # Nothing here yet.
KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512
KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512
endif endif
endif endif
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client
KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server
KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids
KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC")
KOKKOS_CXXFLAGS += -mmic KOKKOS_CXXFLAGS += -mmic
@ -1091,7 +1121,7 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march
endif endif
KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 KOKKOS_INTERNAL_USE_CUDA_ARCH = 1
endif endif
@ -1192,29 +1222,29 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx900
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908 KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
endif endif
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif endif
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG)
@ -1230,51 +1260,67 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
endif endif
endif endif
# Figure out the architecture flag for SYCL. # Figure out Intel architecture flags.
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
# Lets start with adding architecture defines KOKKOS_INTERNAL_LC_BACKEND := sycl
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") KOKKOS_INTERNAL_LC_BACKEND := openmp
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9"
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11"
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp"
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1"
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp" endif
endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device xehp"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC")
KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.4.0"
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp)
KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
KOKKOS_LDFLAGS+=-fsycl KOKKOS_LDFLAGS+=-fsycl
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) -D__STRICT_ANSI__
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
endif endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS")
KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include
else else
ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
$(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) )
@ -1359,7 +1405,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
endif endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
ifneq ($(CUDA_PATH),) ifneq ($(CUDA_PATH),)
@ -1417,6 +1463,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
KOKKOS_TPL_LIBRARY_NAMES += pthread KOKKOS_TPL_LIBRARY_NAMES += pthread
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp)
@ -1449,15 +1500,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
endif endif
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
# device to avoid a link warning.
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
endif
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
endif
# With Cygwin functions such as fdopen and fileno are not defined # With Cygwin functions such as fdopen and fileno are not defined
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # when strict ansi is enabled. strict ansi gets enabled with -std=c++14
# though. So we hard undefine it here. Not sure if that has any bad side effects # though. So we hard undefine it here. Not sure if that has any bad side effects

View File

@ -16,10 +16,6 @@ Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ho
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
@ -41,6 +37,13 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
@ -50,8 +53,8 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
@ -70,20 +73,18 @@ Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp
Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
endif endif

View File

@ -10,270 +10,48 @@ hierarchies and multiple types of execution resources. It currently can use
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
backends in development. backends in development.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, **Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.**
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools). For the complete documentation, click below:
# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki)
# Learning about Kokkos # Learning about Kokkos
The best way to start learning about Kokkos is going through the Kokkos Lectures. To start learning about Kokkos:
They are online available at https://kokkos.link/the-lectures and contain a mix
of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem
capabilities.
A programming guide and API reference can be found on the Wiki - [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities.
(https://github.com/kokkos/kokkos/wiki).
- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html).
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
For non-public questions send an email to For non-public questions send an email to: *crtrott(at)sandia.gov*
crtrott(at)sandia.gov
# Contributing to Kokkos # Contributing to Kokkos
We are open and try to encourage contributions from external developers. Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute.
To do so please first open an issue describing the contribution and then issue
a pull request against the develop branch. For larger features it may be good
to get guidance from the core development team first through the github issue.
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. # Requirements, Building and Installing
Which means contributing to Kokkos allows anyone else to use your contributions
not just for public purposes but also for closed source commercial projects.
For specifics see the LICENSE file contained in the repository or distribution.
# Requirements All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html).
### Minimum Compiler Versions
Generally Kokkos should work with all compiler versions newer than the minimum.
However as in all sufficiently complex enough code, we have to work around compiler
bugs with almost all compilers. So compiler versions we don't test may have issues
we are unaware of.
* GCC: 5.3.0
* Clang: 4.0.0
* Intel: 17.0.1
* NVCC: 9.2.88
* NVC++: 21.5
* ROCm: 4.3
* MSVC: 19.29
* IBM XL: 16.1.1
* Fujitsu: 4.5.0
* ARM/Clang 20.1
### Primary Tested Compilers
* GCC: 5.3.0, 6.1.0, 7.3.0, 8.3, 9.2, 10.0
* NVCC: 9.2.88, 10.1, 11.0
* Clang: 8.0.0, 9.0.0, 10.0.0, 12.0.0
* Intel 17.4, 18.1, 19.5
* MSVC: 19.29
* ARM/Clang: 20.1
* IBM XL: 16.1.1
* ROCm: 4.3.0
### Build system:
* CMake >= 3.16: required
* CMake >= 3.18: Fortran linkage. This does not affect most mixed Fortran/Kokkos builds. See [build issues](BUILD.md#KnownIssues).
* CMake >= 3.21.1 for NVC++
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...).
We are using the following set of flags:
* GCC:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wignored-qualifiers -Wempty-body
-Wclobbered -Wuninitialized
````
* Intel:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* Clang:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
* NVCC:
````
-Wall -Wunused-parameter -Wshadow -pedantic
-Werror -Wsign-compare -Wtype-limits
-Wuninitialized
````
Other compilers are tested occasionally, in particular when pushing from develop to
master branch. These are tested less rigorously without `-Werror` and only for a select set of backends.
# Building and Installing Kokkos
Kokkos provide a CMake build system and a raw Makefile build system.
The CMake build system is strongly encouraged and will be the most rigorously supported in future releases.
Full details are given in the [build instructions](BUILD.md). Basic setups are shown here:
## CMake
The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`:
````bash
cmake $srcdir \
-DCMAKE_CXX_COMPILER=$path_to_compiler \
-DCMAKE_INSTALL_PREFIX=$path_to_install \
-DKokkos_ENABLE_OPENMP=On \
-DKokkos_ARCH_HSW=On \
-DKokkos_HWLOC_DIR=$path_to_hwloc
````
then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages.
To validate the Kokkos build, configure with
````
-DKokkos_ENABLE_TESTS=On
````
and run `make test` after completing the build.
For your CMake project using Kokkos, code such as the following:
````cmake
find_package(Kokkos)
...
target_link_libraries(myTarget Kokkos::kokkos)
````
should be added to your CMakeLists.txt. Your configure should additionally include
````
-DKokkos_DIR=$path_to_install/cmake/lib/Kokkos
````
or
````
-DKokkos_ROOT=$path_to_install
````
for the install location given above.
## Spack
An alternative to manually building with the CMake is to use the Spack package manager.
To get started, download the Spack [repo](https://github.com/spack/spack).
````
A basic installation would be done as:
````bash
> spack install kokkos
````
Spack allows options and and compilers to be tuned in the install command.
````bash
> spack install kokkos@3.0 %gcc@7.3.0 +openmp
````
This example illustrates the three most common parameters to Spack:
* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options.
* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build
* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option.
For a complete list of Kokkos options, run:
````bash
> spack info kokkos
````
Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable".
Generally, Spack usage should never really require you to reference the computer-generated unique install folder.
More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with:
````bash
> spack find -p kokkos ...
````
where `...` is the unique spec identifying the particular Kokkos configuration and version.
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
## Raw Makefile
Raw Makefiles are only supported via inline builds. See below.
## Inline Builds vs. Installed Package
For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package.
The main reason is that you may otherwise need many different
configurations of Kokkos installed depending on the required compile time
features an application needs. For example there is only one default
execution space, which means you need different installations to have OpenMP
or C++ threads as the default space. Also for the CUDA backend there are certain
choices, such as allowing relocatable device code, which must be made at
installation time. Building Kokkos inline uses largely the same process
as compiling an application against an installed Kokkos library.
For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt.
For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build.
# Kokkos and CUDA UVM
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
You can tell Kokkos to use that as the default space for Cuda allocations.
In either case UVM comes with a number of restrictions:
* You can't access allocations on the host while a kernel is potentially
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
* In multi socket multi GPU machines without NVLINK, UVM defaults
to using zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html).
# Citing Kokkos # Citing Kokkos
If you publish work which mentions Kokkos, please cite the following paper: Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html).
````BibTex # License
@ARTICLE{9485033,
author={Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={Kokkos 3: Programming Model Extensions for the Exascale Era},
year={2022},
volume={33},
number={4},
pages={805-817},
doi={10.1109/TPDS.2021.3097283}}
````
If you use more than one Kokkos EcoSystem package, please also cite:
````BibTex
@ARTICLE{9502936,
author={Trott, Christian and Berger-Vergiat, Luc and Poliakoff, David and Rajamanickam, Sivasankaran and Lebrun-Grandie, Damien and Madsen, Jonathan and Al Awar, Nader and Gligoric, Milos and Shipman, Galen and Womeldorff, Geoff},
journal={Computing in Science Engineering},
title={The Kokkos EcoSystem: Comprehensive Performance Portability for High Performance Computing},
year={2021},
volume={23},
number={5},
pages={10-18},
doi={10.1109/MCSE.2021.3098509}}
````
And if you feel generous: feel free to cite the original Kokkos paper which describes most of the basic Kokkos concepts:
````BibTeX
@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ",
volume = "74",
number = "12",
pages = "3202 - 3216",
year = "2014",
note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ",
issn = "0743-7315",
doi = "https://doi.org/10.1016/j.jpdc.2014.07.003",
url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257",
author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland"
}
````
##### [LICENSE](https://github.com/kokkos/kokkos/blob/master/LICENSE)
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
Under the terms of Contract DE-NA0003525 with NTESS, Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software. the U.S. Government retains certain rights in this software.
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).

View File

@ -11,6 +11,7 @@ FILE(GLOB ALGO_HEADERS *.hpp)
FILE(GLOB ALGO_SOURCES *.cpp) FILE(GLOB ALGO_SOURCES *.cpp)
LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp)
INSTALL ( INSTALL (
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"

View File

@ -44,6 +44,10 @@
#ifndef KOKKOS_RANDOM_HPP #ifndef KOKKOS_RANDOM_HPP
#define KOKKOS_RANDOM_HPP #define KOKKOS_RANDOM_HPP
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#endif
#include <Kokkos_Core.hpp> #include <Kokkos_Core.hpp>
#include <Kokkos_Complex.hpp> #include <Kokkos_Complex.hpp>
@ -648,63 +652,44 @@ struct Random_UniqueIndex {
} }
}; };
#ifdef KOKKOS_ENABLE_CUDA #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
template <class MemorySpace>
struct Random_UniqueIndex<Kokkos::Device<Kokkos::Cuda, MemorySpace>> { #if defined(KOKKOS_ENABLE_CUDA)
using locks_view_type = #define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Cuda
View<int**, Kokkos::Device<Kokkos::Cuda, MemorySpace>>; #elif defined(KOKKOS_ENABLE_HIP)
KOKKOS_FUNCTION #define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Experimental::HIP
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
}
}
return i;
#else
(void)locks_;
return 0;
#endif
}
};
#endif #endif
#ifdef KOKKOS_ENABLE_HIP
template <class MemorySpace> template <class MemorySpace>
struct Random_UniqueIndex< struct Random_UniqueIndex<
Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>> { Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP, MemorySpace>> {
using locks_view_type = using locks_view_type =
View<int**, Kokkos::Device<Kokkos::Experimental::HIP, MemorySpace>>; View<int**, Kokkos::Device<KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP,
MemorySpace>>;
KOKKOS_FUNCTION KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) { static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__ KOKKOS_IF_ON_DEVICE((
const int i_offset = const int i_offset =
(threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z;
int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * int i =
(((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) *
blockDim.x * blockDim.y * blockDim.z + blockDim.x * blockDim.y * blockDim.z +
i_offset) % i_offset) %
locks_.extent(0); locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z; i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) { if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset; i = i_offset;
} }
} }
return i;
#else return i;))
(void)locks_; KOKKOS_IF_ON_HOST(((void)locks_; return 0;))
return 0;
#endif
} }
}; };
#undef KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP
#endif #endif
#ifdef KOKKOS_ENABLE_SYCL #ifdef KOKKOS_ENABLE_SYCL
@ -1279,7 +1264,6 @@ struct fill_random_functor_begin_end;
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1303,7 +1287,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 0,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1331,7 +1314,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 1,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1361,7 +1343,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 2,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1392,7 +1373,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 3,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1424,7 +1404,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 4,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1458,7 +1437,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 5,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1494,7 +1472,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 6,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1532,7 +1509,6 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 7,
template <class ViewType, class RandomPool, int loops, class IndexType> template <class ViewType, class RandomPool, int loops, class IndexType>
struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8, struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
IndexType> { IndexType> {
using execution_space = typename ViewType::execution_space;
ViewType a; ViewType a;
RandomPool rand_pool; RandomPool rand_pool;
typename ViewType::const_value_type begin, end; typename ViewType::const_value_type begin, end;
@ -1569,34 +1545,57 @@ struct fill_random_functor_begin_end<ViewType, RandomPool, loops, 8,
} }
}; };
template <class ViewType, class RandomPool, class IndexType = int64_t> template <class ExecutionSpace, class ViewType, class RandomPool,
void fill_random(ViewType a, RandomPool g, class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type begin, typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) { typename ViewType::const_value_type end) {
int64_t LDA = a.extent(0); int64_t LDA = a.extent(0);
if (LDA > 0) if (LDA > 0)
parallel_for("Kokkos::fill_random", (LDA + 127) / 128, parallel_for(
Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128, "Kokkos::fill_random",
ViewType::Rank, IndexType>( Kokkos::RangePolicy<ExecutionSpace>(exec, 0, (LDA + 127) / 128),
a, g, begin, end)); Impl::fill_random_functor_begin_end<ViewType, RandomPool, 128,
ViewType::Rank, IndexType>(
a, g, begin, end));
} }
} // namespace Impl } // namespace Impl
template <class ExecutionSpace, class ViewType, class RandomPool,
class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
Impl::apply_to_view_of_static_rank(
[&](auto dst) { Kokkos::Impl::fill_random(exec, dst, g, begin, end); },
a);
}
template <class ExecutionSpace, class ViewType, class RandomPool,
class IndexType = int64_t>
void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
fill_random(exec, a, g, 0, range);
}
template <class ViewType, class RandomPool, class IndexType = int64_t> template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g, void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type begin, typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) { typename ViewType::const_value_type end) {
Impl::apply_to_view_of_static_rank( fill_random(typename ViewType::execution_space{}, a, g, begin, end);
[&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a);
} }
template <class ViewType, class RandomPool, class IndexType = int64_t> template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g, void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) { typename ViewType::const_value_type range) {
fill_random(a, g, 0, range); fill_random(typename ViewType::execution_space{}, a, g, 0, range);
} }
} // namespace Kokkos } // namespace Kokkos
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM
#endif
#endif #endif

View File

@ -44,6 +44,10 @@
#ifndef KOKKOS_SORT_HPP_ #ifndef KOKKOS_SORT_HPP_
#define KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#endif
#include <Kokkos_Core.hpp> #include <Kokkos_Core.hpp>
@ -120,13 +124,13 @@ class BinSort {
// If a Kokkos::View then can generate constant random access // If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type. // otherwise can only use the constant type.
using src_view_type = typename std::conditional< using src_view_type = std::conditional_t<
Kokkos::is_view<SrcViewType>::value, Kokkos::is_view<SrcViewType>::value,
Kokkos::View<typename SrcViewType::const_data_type, Kokkos::View<typename SrcViewType::const_data_type,
typename SrcViewType::array_layout, typename SrcViewType::array_layout,
typename SrcViewType::device_type, typename SrcViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >, Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
typename SrcViewType::const_type>::type; typename SrcViewType::const_type>;
using perm_view_type = typename PermuteViewType::const_type; using perm_view_type = typename PermuteViewType::const_type;
@ -151,8 +155,11 @@ class BinSort {
} }
}; };
using execution_space = typename Space::execution_space; // Naming this alias "execution_space" would be problematic since it would be
using bin_op_type = BinSortOp; // considered as execution space for the various functors which might use
// another execution space through sort() or create_permute_vector().
using exec_space = typename Space::execution_space;
using bin_op_type = BinSortOp;
struct bin_count_tag {}; struct bin_count_tag {};
struct bin_offset_tag {}; struct bin_offset_tag {};
@ -171,13 +178,13 @@ class BinSort {
// If a Kokkos::View then can generate constant random access // If a Kokkos::View then can generate constant random access
// otherwise can only use the constant type. // otherwise can only use the constant type.
using const_rnd_key_view_type = typename std::conditional< using const_rnd_key_view_type = std::conditional_t<
Kokkos::is_view<KeyViewType>::value, Kokkos::is_view<KeyViewType>::value,
Kokkos::View<typename KeyViewType::const_data_type, Kokkos::View<typename KeyViewType::const_data_type,
typename KeyViewType::array_layout, typename KeyViewType::array_layout,
typename KeyViewType::device_type, typename KeyViewType::device_type,
Kokkos::MemoryTraits<Kokkos::RandomAccess> >, Kokkos::MemoryTraits<Kokkos::RandomAccess> >,
const_key_view_type>::type; const_key_view_type>;
using non_const_key_scalar = typename KeyViewType::non_const_value_type; using non_const_key_scalar = typename KeyViewType::non_const_value_type;
using const_key_scalar = typename KeyViewType::const_value_type; using const_key_scalar = typename KeyViewType::const_value_type;
@ -220,6 +227,14 @@ class BinSort {
range_begin(range_begin_), range_begin(range_begin_),
range_end(range_end_), range_end(range_end_),
sort_within_bins(sort_within_bins_) { sort_within_bins(sort_within_bins_) {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
if (bin_op.max_bins() <= 0)
Kokkos::abort(
"The number of bins in the BinSortOp object must be greater than 0!");
bin_count_atomic = Kokkos::View<int*, Space>( bin_count_atomic = Kokkos::View<int*, Space>(
"Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
bin_count_const = bin_count_atomic; bin_count_const = bin_count_atomic;
@ -235,7 +250,7 @@ class BinSort {
BinSort(const_key_view_type keys_, int range_begin_, int range_end_, BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false) BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_, : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_,
sort_within_bins_) {} sort_within_bins_) {}
template <typename ExecutionSpace> template <typename ExecutionSpace>
@ -245,13 +260,19 @@ class BinSort {
BinSort(const_key_view_type keys_, BinSortOp bin_op_, BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false) bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {} : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {}
//---------------------------------------- //----------------------------------------
// Create the permutation vector, the bin_offset array and the bin_count // Create the permutation vector, the bin_offset array and the bin_count
// array. Can be called again if keys changed // array. Can be called again if keys changed
template <class ExecutionSpace = execution_space> template <class ExecutionSpace = exec_space>
void create_permute_vector(const ExecutionSpace& exec = execution_space{}) { void create_permute_vector(const ExecutionSpace& exec = exec_space{}) {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
const size_t len = range_end - range_begin; const size_t len = range_end - range_begin;
Kokkos::parallel_for( Kokkos::parallel_for(
"Kokkos::Sort::BinCount", "Kokkos::Sort::BinCount",
@ -281,6 +302,17 @@ class BinSort {
template <class ExecutionSpace, class ValuesViewType> template <class ExecutionSpace, class ValuesViewType>
void sort(const ExecutionSpace& exec, ValuesViewType const& values, void sort(const ExecutionSpace& exec, ValuesViewType const& values,
int values_range_begin, int values_range_end) const { int values_range_begin, int values_range_end) const {
static_assert(
Kokkos::SpaceAccessibility<ExecutionSpace,
typename Space::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"BinSort was initialized with!");
static_assert(
Kokkos::SpaceAccessibility<
ExecutionSpace, typename ValuesViewType::memory_space>::accessible,
"The provided execution space must be able to access the memory space "
"of the View argument!");
using scratch_view_type = using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type, Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout, typename ValuesViewType::array_layout,
@ -340,7 +372,7 @@ class BinSort {
template <class ValuesViewType> template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin, void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const { int values_range_end) const {
execution_space exec; exec_space exec;
sort(exec, values, values_range_begin, values_range_end); sort(exec, values, values_range_begin, values_range_end);
exec.fence("Kokkos::Sort: fence after sorting"); exec.fence("Kokkos::Sort: fence after sorting");
} }
@ -428,7 +460,7 @@ struct BinOp1D {
BinOp1D() = default; BinOp1D() = default;
// Construct BinOp with number of bins, minimum value and maxuimum value // Construct BinOp with number of bins, minimum value and maximum value
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max) typename KeyViewType::const_value_type max)
: max_bins_(max_bins__ + 1), : max_bins_(max_bins__ + 1),
@ -554,11 +586,7 @@ struct min_max_functor {
template <class ExecutionSpace, class ViewType> template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view, const ExecutionSpace& exec, ViewType const& view) {
bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view, exec)) return;
}
using CompType = BinOp1D<ViewType>; using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result; Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
@ -596,12 +624,38 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
bin_sort.sort(exec, view); bin_sort.sort(exec, view);
} }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
template <class ExecutionSpace, class ViewType>
KOKKOS_DEPRECATED_WITH_COMMENT(
"Use the overload not taking bool always_use_kokkos_sort")
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view,
bool const always_use_kokkos_sort) {
if (!always_use_kokkos_sort && Impl::try_std_sort(view, exec)) {
return;
} else {
sort(exec, view);
}
}
#endif
template <class ViewType> template <class ViewType>
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) { void sort(ViewType const& view) {
typename ViewType::execution_space exec;
sort(exec, view);
exec.fence("Kokkos::Sort: fence after sorting");
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
template <class ViewType>
KOKKOS_DEPRECATED_WITH_COMMENT(
"Use the overload not taking bool always_use_kokkos_sort")
void sort(ViewType const& view, bool const always_use_kokkos_sort) {
typename ViewType::execution_space exec; typename ViewType::execution_space exec;
sort(exec, view, always_use_kokkos_sort); sort(exec, view, always_use_kokkos_sort);
exec.fence("Kokkos::Sort: fence after sorting"); exec.fence("Kokkos::Sort: fence after sorting");
} }
#endif
template <class ExecutionSpace, class ViewType> template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort( std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
@ -635,4 +689,8 @@ void sort(ViewType view, size_t const begin, size_t const end) {
} // namespace Kokkos } // namespace Kokkos
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
#endif
#endif #endif

View File

@ -44,59 +44,103 @@
#ifndef KOKKOS_STD_ALGORITHMS_HPP #ifndef KOKKOS_STD_ALGORITHMS_HPP
#define KOKKOS_STD_ALGORITHMS_HPP #define KOKKOS_STD_ALGORITHMS_HPP
#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#endif
/// \file Kokkos_StdAlgorithms.hpp /// \file Kokkos_StdAlgorithms.hpp
/// \brief Kokkos counterparts for Standard C++ Library algorithms /// \brief Kokkos counterparts for Standard C++ Library algorithms
#include <std_algorithms/Kokkos_Constraints.hpp> #include "std_algorithms/impl/Kokkos_Constraints.hpp"
#include <std_algorithms/Kokkos_RandomAccessIterator.hpp> #include "std_algorithms/impl/Kokkos_RandomAccessIterator.hpp"
#include <std_algorithms/Kokkos_BeginEnd.hpp> #include "std_algorithms/Kokkos_BeginEnd.hpp"
// distance // distance
#include <std_algorithms/Kokkos_Distance.hpp> #include "std_algorithms/Kokkos_Distance.hpp"
// move, swap, iter_swap // note that we categorize below the headers
#include "std_algorithms/Kokkos_ModifyingOperations.hpp" // following the std classification.
// find, find_if, find_if_not // modifying ops
// for_each, for_each_n #include "std_algorithms/Kokkos_Swap.hpp"
// mismatch #include "std_algorithms/Kokkos_IterSwap.hpp"
// equal
// count_if, count
// all_of, any_of, none_of
// adjacent_find
// lexicographical_compare
// search, search_n
// find_first_of, find_end
#include <std_algorithms/Kokkos_NonModifyingSequenceOperations.hpp>
// replace, replace_copy_if, replace_copy, replace_if // non-modifying sequence
// copy, copy_n, copy_backward, copy_if #include "std_algorithms/Kokkos_AdjacentFind.hpp"
// fill, fill_n #include "std_algorithms/Kokkos_Count.hpp"
// transform #include "std_algorithms/Kokkos_CountIf.hpp"
// generate, generate_n #include "std_algorithms/Kokkos_AllOf.hpp"
// reverse, reverse_copy #include "std_algorithms/Kokkos_AnyOf.hpp"
// move, move_backward #include "std_algorithms/Kokkos_NoneOf.hpp"
// swap_ranges #include "std_algorithms/Kokkos_Equal.hpp"
// unique, unique_copy #include "std_algorithms/Kokkos_Find.hpp"
// rotate, rotate_copy #include "std_algorithms/Kokkos_FindIf.hpp"
// remove, remove_if, remove_copy, remove_copy_if #include "std_algorithms/Kokkos_FindIfNot.hpp"
// shift_left, shift_right #include "std_algorithms/Kokkos_FindEnd.hpp"
#include <std_algorithms/Kokkos_ModifyingSequenceOperations.hpp> #include "std_algorithms/Kokkos_FindFirstOf.hpp"
#include "std_algorithms/Kokkos_ForEach.hpp"
#include "std_algorithms/Kokkos_ForEachN.hpp"
#include "std_algorithms/Kokkos_LexicographicalCompare.hpp"
#include "std_algorithms/Kokkos_Mismatch.hpp"
#include "std_algorithms/Kokkos_Search.hpp"
#include "std_algorithms/Kokkos_SearchN.hpp"
// is_sorted_until, is_sorted // modifying sequence
#include <std_algorithms/Kokkos_SortingOperations.hpp> #include "std_algorithms/Kokkos_Fill.hpp"
#include "std_algorithms/Kokkos_FillN.hpp"
#include "std_algorithms/Kokkos_Replace.hpp"
#include "std_algorithms/Kokkos_ReplaceIf.hpp"
#include "std_algorithms/Kokkos_ReplaceCopyIf.hpp"
#include "std_algorithms/Kokkos_ReplaceCopy.hpp"
#include "std_algorithms/Kokkos_Copy.hpp"
#include "std_algorithms/Kokkos_CopyN.hpp"
#include "std_algorithms/Kokkos_CopyBackward.hpp"
#include "std_algorithms/Kokkos_CopyIf.hpp"
#include "std_algorithms/Kokkos_Transform.hpp"
#include "std_algorithms/Kokkos_Generate.hpp"
#include "std_algorithms/Kokkos_GenerateN.hpp"
#include "std_algorithms/Kokkos_Reverse.hpp"
#include "std_algorithms/Kokkos_ReverseCopy.hpp"
#include "std_algorithms/Kokkos_Move.hpp"
#include "std_algorithms/Kokkos_MoveBackward.hpp"
#include "std_algorithms/Kokkos_SwapRanges.hpp"
#include "std_algorithms/Kokkos_Unique.hpp"
#include "std_algorithms/Kokkos_UniqueCopy.hpp"
#include "std_algorithms/Kokkos_Rotate.hpp"
#include "std_algorithms/Kokkos_RotateCopy.hpp"
#include "std_algorithms/Kokkos_Remove.hpp"
#include "std_algorithms/Kokkos_RemoveIf.hpp"
#include "std_algorithms/Kokkos_RemoveCopy.hpp"
#include "std_algorithms/Kokkos_RemoveCopyIf.hpp"
#include "std_algorithms/Kokkos_ShiftLeft.hpp"
#include "std_algorithms/Kokkos_ShiftRight.hpp"
// min_element, max_element, minmax_element // sorting
#include <std_algorithms/Kokkos_MinMaxElementOperations.hpp> #include "std_algorithms/Kokkos_IsSortedUntil.hpp"
#include "std_algorithms/Kokkos_IsSorted.hpp"
// is_partitioned, partition_copy, partition_point // min/max element
#include <std_algorithms/Kokkos_PartitioningOperations.hpp> #include "std_algorithms/Kokkos_MinElement.hpp"
#include "std_algorithms/Kokkos_MaxElement.hpp"
#include "std_algorithms/Kokkos_MinMaxElement.hpp"
// adjacent_difference // partitioning
// reduce, transform_reduce #include "std_algorithms/Kokkos_IsPartitioned.hpp"
// exclusive_scan, transform_exclusive_scan #include "std_algorithms/Kokkos_PartitionCopy.hpp"
// inclusive_scan, transform_inclusive_scan #include "std_algorithms/Kokkos_PartitionPoint.hpp"
#include <std_algorithms/Kokkos_Numeric.hpp>
// numeric
#include "std_algorithms/Kokkos_AdjacentDifference.hpp"
#include "std_algorithms/Kokkos_Reduce.hpp"
#include "std_algorithms/Kokkos_TransformReduce.hpp"
#include "std_algorithms/Kokkos_ExclusiveScan.hpp"
#include "std_algorithms/Kokkos_TransformExclusiveScan.hpp"
#include "std_algorithms/Kokkos_InclusiveScan.hpp"
#include "std_algorithms/Kokkos_TransformInclusiveScan.hpp"
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS
#endif
#endif #endif

View File

@ -42,106 +42,15 @@
//@HEADER //@HEADER
*/ */
#ifndef KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP #ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP
#define KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP #define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP
#include <Kokkos_Core.hpp> #include "impl/Kokkos_AdjacentDifference.hpp"
#include "../Kokkos_BeginEnd.hpp" #include "Kokkos_BeginEnd.hpp"
#include "../Kokkos_Constraints.hpp"
#include "../Kokkos_Distance.hpp"
namespace Kokkos { namespace Kokkos {
namespace Experimental { namespace Experimental {
namespace Impl {
// ------------------------
//
// functors
//
// ------------------------
template <class ValueType1, class ValueType2, class RetType = ValueType2>
struct StdAdjacentDifferenceDefaultBinaryOpFunctor {
KOKKOS_FUNCTION
constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const {
return a - b;
}
};
template <class InputIteratorType, class OutputIteratorType,
class BinaryOperator>
struct StdAdjacentDiffFunctor {
using index_type = typename InputIteratorType::difference_type;
const InputIteratorType m_first_from;
const OutputIteratorType m_first_dest;
BinaryOperator m_op;
KOKKOS_FUNCTION
void operator()(const index_type i) const {
const auto& my_value = m_first_from[i];
if (i == 0) {
m_first_dest[i] = my_value;
} else {
const auto& left_value = m_first_from[i - 1];
m_first_dest[i] = m_op(my_value, left_value);
}
}
KOKKOS_FUNCTION
StdAdjacentDiffFunctor(InputIteratorType first_from,
OutputIteratorType first_dest, BinaryOperator op)
: m_first_from(std::move(first_from)),
m_first_dest(std::move(first_dest)),
m_op(std::move(op)) {}
};
// ------------------------------------------
// adjacent_difference_impl
// ------------------------------------------
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class BinaryOp>
OutputIteratorType adjacent_difference_impl(const std::string& label,
const ExecutionSpace& ex,
InputIteratorType first_from,
InputIteratorType last_from,
OutputIteratorType first_dest,
BinaryOp bin_op) {
// checks
Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest);
Impl::static_assert_iterators_have_matching_difference_type(first_from,
first_dest);
Impl::expect_valid_range(first_from, last_from);
if (first_from == last_from) {
return first_dest;
}
// aliases
using value_type = typename OutputIteratorType::value_type;
using aux_view_type = ::Kokkos::View<value_type*, ExecutionSpace>;
using functor_t =
StdAdjacentDiffFunctor<InputIteratorType, OutputIteratorType, BinaryOp>;
// run
const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from);
aux_view_type aux_view("aux_view", num_elements);
::Kokkos::parallel_for(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
functor_t(first_from, first_dest, bin_op));
ex.fence("Kokkos::adjacent_difference: fence after operation");
// return
return first_dest + num_elements;
}
} // end namespace Impl
// ------------------------
//
// public API
//
// ------------------------
template <class ExecutionSpace, class InputIteratorType, template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType> class OutputIteratorType>
std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value, std::enable_if_t<!::Kokkos::is_view<InputIteratorType>::value,

View File

@ -0,0 +1,124 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP
#define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP
#include "impl/Kokkos_AdjacentFind.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
// overload set1
template <class ExecutionSpace, class IteratorType>
IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
IteratorType last) {
return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
ex, first, last);
}
template <class ExecutionSpace, class IteratorType>
IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last) {
return Impl::adjacent_find_impl(label, ex, first, last);
}
template <class ExecutionSpace, class DataType, class... Properties>
auto adjacent_find(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
KE::begin(v), KE::end(v));
}
template <class ExecutionSpace, class DataType, class... Properties>
auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v));
}
// overload set2
template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first,
IteratorType last, BinaryPredicateType pred) {
return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default",
ex, first, last, pred);
}
template <class ExecutionSpace, class IteratorType, class BinaryPredicateType>
IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last,
BinaryPredicateType pred) {
return Impl::adjacent_find_impl(label, ex, first, last, pred);
}
template <class ExecutionSpace, class DataType, class... Properties,
class BinaryPredicateType>
auto adjacent_find(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
BinaryPredicateType pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex,
KE::begin(v), KE::end(v), pred);
}
template <class ExecutionSpace, class DataType, class... Properties,
class BinaryPredicateType>
auto adjacent_find(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
BinaryPredicateType pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred);
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP
#define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP
#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Predicate>
bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
Predicate predicate) {
return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first,
last, predicate);
}
template <class ExecutionSpace, class InputIterator, class Predicate>
bool all_of(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last, Predicate predicate) {
return Impl::all_of_impl(label, ex, first, last, predicate);
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool all_of(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v),
KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool all_of(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP
#define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP
#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Predicate>
bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last,
Predicate predicate) {
return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last,
predicate);
}
template <class ExecutionSpace, class InputIterator, class Predicate>
bool any_of(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last, Predicate predicate) {
return Impl::any_of_impl(label, ex, first, last, predicate);
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool any_of(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v),
KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
bool any_of(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -46,8 +46,8 @@
#define KOKKOS_BEGIN_END_HPP #define KOKKOS_BEGIN_END_HPP
#include <Kokkos_View.hpp> #include <Kokkos_View.hpp>
#include "Kokkos_RandomAccessIterator.hpp" #include "impl/Kokkos_RandomAccessIterator.hpp"
#include "Kokkos_Constraints.hpp" #include "impl/Kokkos_Constraints.hpp"
/// \file Kokkos_BeginEnd.hpp /// \file Kokkos_BeginEnd.hpp
/// \brief Kokkos begin, end, cbegin, cend /// \brief Kokkos begin, end, cbegin, cend

View File

@ -0,0 +1,97 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_HPP
#include "impl/Kokkos_CopyCopyN.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class OutputIterator>
OutputIterator copy(const ExecutionSpace& ex, InputIterator first,
InputIterator last, OutputIterator d_first) {
return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last,
d_first);
}
template <class ExecutionSpace, class InputIterator, class OutputIterator>
OutputIterator copy(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last,
OutputIterator d_first) {
return Impl::copy_impl(label, ex, first, last, d_first);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_impl("Kokkos::copy_view_api_default", ex,
KE::cbegin(source), KE::cend(source), KE::begin(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source),
KE::begin(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,95 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP
#include "impl/Kokkos_CopyBackward.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first,
IteratorType1 last, IteratorType2 d_last) {
return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default",
ex, first, last, d_last);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex,
IteratorType1 first, IteratorType1 last,
IteratorType2 d_last) {
return Impl::copy_backward_impl(label, ex, first, last, d_last);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex,
cbegin(source), cend(source), end(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source),
end(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP
#include "impl/Kokkos_CopyIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class OutputIterator,
class Predicate>
OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first,
InputIterator last, OutputIterator d_first,
Predicate pred) {
return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first,
last, d_first, std::move(pred));
}
template <class ExecutionSpace, class InputIterator, class OutputIterator,
class Predicate>
OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex,
InputIterator first, InputIterator last,
OutputIterator d_first, Predicate pred) {
return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class Predicate>
auto copy_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex,
cbegin(source), cend(source), begin(dest),
std::move(pred));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class Predicate>
auto copy_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
return Impl::copy_if_impl(label, ex, cbegin(source), cend(source),
begin(dest), std::move(pred));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,98 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP
#define KOKKOS_STD_ALGORITHMS_COPY_N_HPP
#include "impl/Kokkos_CopyCopyN.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class InputIterator, class Size,
class OutputIterator>
OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count,
OutputIterator result) {
return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first,
count, result);
}
template <class ExecutionSpace, class InputIterator, class Size,
class OutputIterator>
OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex,
InputIterator first, Size count, OutputIterator result) {
return Impl::copy_n_impl(label, ex, first, count, result);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class Size, class DataType2, class... Properties2>
auto copy_n(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex,
KE::cbegin(source), count, KE::begin(dest));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class Size, class DataType2, class... Properties2>
auto copy_n(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
namespace KE = ::Kokkos::Experimental;
return Impl::copy_n_impl(label, ex, KE::cbegin(source), count,
KE::begin(dest));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,94 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP
#define KOKKOS_STD_ALGORITHMS_COUNT_HPP
#include "impl/Kokkos_CountCountIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType, class T>
typename IteratorType::difference_type count(const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
const T& value) {
return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last,
value);
}
template <class ExecutionSpace, class IteratorType, class T>
typename IteratorType::difference_type count(const std::string& label,
const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
const T& value) {
return Impl::count_impl(label, ex, first, last, value);
}
template <class ExecutionSpace, class DataType, class... Properties, class T>
auto count(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v),
KE::cend(v), value);
}
template <class ExecutionSpace, class DataType, class... Properties, class T>
auto count(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const T& value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value);
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP
#define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP
#include "impl/Kokkos_CountCountIf.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType, class Predicate>
typename IteratorType::difference_type count_if(const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
Predicate predicate) {
return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first,
last, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType, class Predicate>
typename IteratorType::difference_type count_if(const std::string& label,
const ExecutionSpace& ex,
IteratorType first,
IteratorType last,
Predicate predicate) {
return Impl::count_if_impl(label, ex, first, last, std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
auto count_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex,
KE::cbegin(v), KE::cend(v), std::move(predicate));
}
template <class ExecutionSpace, class DataType, class... Properties,
class Predicate>
auto count_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v,
Predicate predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental;
return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v),
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -45,8 +45,8 @@
#ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
#define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP
#include "Kokkos_Constraints.hpp" #include "impl/Kokkos_Constraints.hpp"
#include "Kokkos_RandomAccessIterator.hpp" #include "impl/Kokkos_RandomAccessIterator.hpp"
namespace Kokkos { namespace Kokkos {
namespace Experimental { namespace Experimental {

View File

@ -0,0 +1,198 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP
#define KOKKOS_STD_ALGORITHMS_EQUAL_HPP
#include "impl/Kokkos_Equal.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2) {
return Impl::equal_impl(label, ex, first1, last1, first2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, BinaryPredicateType predicate) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2,
BinaryPredicateType predicate) {
return Impl::equal_impl(label, ex, first1, last1, first2,
std::move(predicate));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class BinaryPredicateType>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl("Kokkos::equal_view_api_default", ex,
KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2),
std::move(predicate));
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class BinaryPredicateType>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
namespace KE = ::Kokkos::Experimental;
return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1),
KE::cbegin(view2), std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, IteratorType2 last2) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, last2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) {
return Impl::equal_impl(label, ex, first1, last1, first2, last2);
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, IteratorType2 last2,
BinaryPredicateType predicate) {
return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1,
last1, first2, last2, std::move(predicate));
}
template <class ExecutionSpace, class IteratorType1, class IteratorType2,
class BinaryPredicateType>
std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators<
IteratorType1, IteratorType2>::value,
bool>
equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1,
IteratorType1 last1, IteratorType2 first2, IteratorType2 last2,
BinaryPredicateType predicate) {
return Impl::equal_impl(label, ex, first1, last1, first2, last2,
std::move(predicate));
}
} // namespace Experimental
} // namespace Kokkos
#endif

View File

@ -0,0 +1,190 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP
#define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP
#include "impl/Kokkos_ExclusiveScan.hpp"
#include "Kokkos_BeginEnd.hpp"
namespace Kokkos {
namespace Experimental {
// overload set 1
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
InputIteratorType last, OutputIteratorType first_dest,
ValueType init_value) {
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_default_op_impl(
"Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last,
first_dest, init_value);
}
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const std::string& label, const ExecutionSpace& ex,
InputIteratorType first, InputIteratorType last,
OutputIteratorType first_dest, ValueType init_value) {
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_default_op_impl(label, ex, first, last,
first_dest, init_value);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType>
auto exclusive_scan(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_default_op_impl(
"Kokkos::exclusive_scan_default_functors_view_api", ex,
KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
init_value);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType>
auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from),
KE::cend(view_from),
KE::begin(view_dest), init_value);
}
// overload set 2
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType, class BinaryOpType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const ExecutionSpace& ex, InputIteratorType first,
InputIteratorType last, OutputIteratorType first_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_custom_op_impl(
"Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last,
first_dest, init_value, bop);
}
template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class ValueType, class BinaryOpType>
std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators<
InputIteratorType, OutputIteratorType>::value,
OutputIteratorType>
exclusive_scan(const std::string& label, const ExecutionSpace& ex,
InputIteratorType first, InputIteratorType last,
OutputIteratorType first_dest, ValueType init_value,
BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest,
init_value, bop);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType,
class BinaryOpType>
auto exclusive_scan(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_custom_op_impl(
"Kokkos::exclusive_scan_custom_functors_view_api", ex,
KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
}
template <class ExecutionSpace, class DataType1, class... Properties1,
class DataType2, class... Properties2, class ValueType,
class BinaryOpType>
auto exclusive_scan(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view_from,
const ::Kokkos::View<DataType2, Properties2...>& view_dest,
ValueType init_value, BinaryOpType bop) {
Impl::static_assert_is_not_openmptarget(ex);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest);
static_assert(std::is_move_constructible<ValueType>::value,
"ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental;
return Impl::exclusive_scan_custom_op_impl(
label, ex, KE::cbegin(view_from), KE::cend(view_from),
KE::begin(view_dest), init_value, bop);
}
} // namespace Experimental
} // namespace Kokkos
#endif

Some files were not shown because too many files have changed in this diff Show More