forked from lijiext/lammps
Merge pull request #1422 from stanmoore1/team_opt
Optimize KOKKOS package for small systems
This commit is contained in:
commit
e72ac92a7f
|
@ -46,7 +46,7 @@ software version 7.5 or later must be installed on your system. See
|
|||
the discussion for the "GPU package"_Speed_gpu.html for details of how
|
||||
to check and do this.
|
||||
|
||||
NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI
|
||||
NOTE: Kokkos with CUDA currently implicitly assumes that the MPI
|
||||
library is CUDA-aware and has support for GPU-direct. This is not
|
||||
always the case, especially when using pre-compiled MPI libraries
|
||||
provided by a Linux distribution. This is not a problem when using
|
||||
|
@ -207,19 +207,21 @@ supports.
|
|||
|
||||
[Running on GPUs:]
|
||||
|
||||
Use the "-k" "command-line switch"_Run_options.html to
|
||||
specify the number of GPUs per node. Typically the -np setting of the
|
||||
mpirun command should set the number of MPI tasks/node to be equal to
|
||||
the number of physical GPUs on the node. You can assign multiple MPI
|
||||
tasks to the same GPU with the KOKKOS package, but this is usually
|
||||
only faster if significant portions of the input script have not
|
||||
been ported to use Kokkos. Using CUDA MPS is recommended in this
|
||||
scenario. Using a CUDA-aware MPI library with support for GPU-direct
|
||||
is highly recommended. GPU-direct use can be avoided by using
|
||||
"-pk kokkos gpu/direct no"_package.html.
|
||||
As above for multi-core CPUs (and no GPU), if N is the number of
|
||||
physical cores/node, then the number of MPI tasks/node should not
|
||||
exceed N.
|
||||
Use the "-k" "command-line switch"_Run_options.html to specify the
|
||||
number of GPUs per node. Typically the -np setting of the mpirun command
|
||||
should set the number of MPI tasks/node to be equal to the number of
|
||||
physical GPUs on the node. You can assign multiple MPI tasks to the same
|
||||
GPU with the KOKKOS package, but this is usually only faster if some
|
||||
portions of the input script have not been ported to use Kokkos. In this
|
||||
case, also packing/unpacking communication buffers on the host may give
|
||||
speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS
|
||||
is recommended in this scenario.
|
||||
|
||||
Using a CUDA-aware MPI library with
|
||||
support for GPU-direct is highly recommended. GPU-direct use can be
|
||||
avoided by using "-pk kokkos gpu/direct no"_package.html. As above for
|
||||
multi-core CPUs (and no GPU), if N is the number of physical cores/node,
|
||||
then the number of MPI tasks/node should not exceed N.
|
||||
|
||||
-k on g Ng :pre
|
||||
|
||||
|
|
|
@ -64,13 +64,16 @@ args = arguments specific to the style :l
|
|||
{no_affinity} values = none
|
||||
{kokkos} args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
|
||||
keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
|
||||
{neigh} value = {full} or {half}
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
{neigh/qeq} value = {full} or {half}
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
{neigh/thread} value = {off} or {on}
|
||||
off = thread only over atoms
|
||||
on = thread over both atoms and neighbors
|
||||
{newton} = {off} or {on}
|
||||
off = set Newton pairwise and bonded flags off
|
||||
on = set Newton pairwise and bonded flags on
|
||||
|
@ -442,7 +445,19 @@ running on CPUs, a {half} neighbor list is the default because it are
|
|||
often faster, just as it is for non-accelerated pair styles. Similarly,
|
||||
the {neigh/qeq} keyword determines how neighbor lists are built for "fix
|
||||
qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
|
||||
{neigh/qeq} will match {neigh}.
|
||||
{neigh/qeq} will match {neigh}.
|
||||
|
||||
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
|
||||
threads only over atoms. However, for small systems, this may not expose
|
||||
enough parallelism to keep a GPU busy. When this keyword is set to {on},
|
||||
the KOKKOS package threads over both atoms and neighbors of atoms. When
|
||||
using {neigh/thread} {on}, a full neighbor list must also be used. Using
|
||||
{neigh/thread} {on} may be slower for large systems, so this this option
|
||||
is turned on by default only when there are 16K atoms or less owned by
|
||||
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
|
||||
potentials support this keyword yet, and only thread over atoms. Many
|
||||
simple pair-wise potentials such as Lennard-Jones do support threading
|
||||
over both atoms and neighbors.
|
||||
|
||||
The {newton} keyword sets the Newton flags for pairwise and bonded
|
||||
interactions to {off} or {on}, the same as the "newton"_newton.html
|
||||
|
@ -475,10 +490,10 @@ are rebuilt. The data is only for atoms that migrate to new processors.
|
|||
"Forward" communication happens every timestep. "Reverse" communication
|
||||
happens every timestep if the {newton} option is on. The data is for
|
||||
atom coordinates and any other atom properties that needs to be updated
|
||||
for ghost atoms owned by each processor.
|
||||
for ghost atoms owned by each processor.
|
||||
|
||||
The {comm} keyword is simply a short-cut to set the same value for both
|
||||
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
|
||||
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
|
||||
|
||||
The value options for all 3 keywords are {no} or {host} or {device}. A
|
||||
value of {no} means to use the standard non-KOKKOS method of
|
||||
|
@ -486,26 +501,26 @@ packing/unpacking data for the communication. A value of {host} means to
|
|||
use the host, typically a multi-core CPU, and perform the
|
||||
packing/unpacking in parallel with threads. A value of {device} means to
|
||||
use the device, typically a GPU, to perform the packing/unpacking
|
||||
operation.
|
||||
operation.
|
||||
|
||||
The optimal choice for these keywords depends on the input script and
|
||||
the hardware used. The {no} value is useful for verifying that the
|
||||
Kokkos-based {host} and {device} values are working correctly. It is the
|
||||
default when running on CPUs since it is usually the fastest.
|
||||
default when running on CPUs since it is usually the fastest.
|
||||
|
||||
When running on CPUs or Xeon Phi, the {host} and {device} values work
|
||||
identically. When using GPUs, the {device} value is the default since it
|
||||
will typically be optimal if all of your styles used in your input
|
||||
script are supported by the KOKKOS package. In this case data can stay
|
||||
on the GPU for many timesteps without being moved between the host and
|
||||
GPU, if you use the {device} value. This requires that your MPI is able
|
||||
to access GPU memory directly. Currently that is true for OpenMPI 1.8
|
||||
(or later versions), Mvapich2 1.9 (or later), and CrayMPI. If your
|
||||
script uses styles (e.g. fixes) which are not yet supported by the
|
||||
KOKKOS package, then data has to be move between the host and device
|
||||
anyway, so it is typically faster to let the host handle communication,
|
||||
by using the {host} value. Using {host} instead of {no} will enable use
|
||||
of multiple threads to pack/unpack communicated data.
|
||||
GPU, if you use the {device} value. If your script uses styles (e.g.
|
||||
fixes) which are not yet supported by the KOKKOS package, then data has
|
||||
to be move between the host and device anyway, so it is typically faster
|
||||
to let the host handle communication, by using the {host} value. Using
|
||||
{host} instead of {no} will enable use of multiple threads to
|
||||
pack/unpack communicated data. When running small systems on a GPU,
|
||||
performing the exchange pack/unpack on the host CPU can give speedup
|
||||
since it reduces the number of CUDA kernel launches.
|
||||
|
||||
The {gpu/direct} keyword chooses whether GPU-direct will be used. When
|
||||
this keyword is set to {on}, buffers in GPU memory are passed directly
|
||||
|
@ -518,7 +533,8 @@ the {gpu/direct} keyword is automatically set to {off} by default. When
|
|||
the {gpu/direct} keyword is set to {off} while any of the {comm}
|
||||
keywords are set to {device}, the value for these {comm} keywords will
|
||||
be automatically changed to {host}. This setting has no effect if not
|
||||
running on GPUs.
|
||||
running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later
|
||||
versions), Mvapich2 1.9 (or later), and CrayMPI.
|
||||
|
||||
:line
|
||||
|
||||
|
@ -630,11 +646,12 @@ neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
|||
value, comm = device, gpu/direct = on. When LAMMPS can safely detect
|
||||
that GPU-direct is not available, the default value of gpu/direct
|
||||
becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
|
||||
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. These
|
||||
settings are made automatically by the required "-k on" "command-line
|
||||
switch"_Run_options.html. You can change them by using the package
|
||||
kokkos command in your input script or via the "-pk kokkos command-line
|
||||
switch"_Run_options.html.
|
||||
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
|
||||
option neigh/thread = on when there are 16K atoms or less on an MPI
|
||||
rank, otherwise it is "off". These settings are made automatically by
|
||||
the required "-k on" "command-line switch"_Run_options.html. You can
|
||||
change them by using the package kokkos command in your input script or
|
||||
via the "-pk kokkos command-line switch"_Run_options.html.
|
||||
|
||||
For the OMP package, the default is Nthreads = 0 and the option
|
||||
defaults are neigh = yes. These settings are made automatically if
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "memory_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -270,8 +271,10 @@ int AtomKokkos::add_custom(const char *name, int flag)
|
|||
int n = strlen(name) + 1;
|
||||
dname[index] = new char[n];
|
||||
strcpy(dname[index],name);
|
||||
this->sync(Device,DVECTOR_MASK);
|
||||
memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax,
|
||||
"atom:dvector");
|
||||
this->modified(Device,DVECTOR_MASK);
|
||||
}
|
||||
|
||||
return index;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -59,14 +59,15 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecAngleKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -98,7 +99,7 @@ void AtomVecAngleKokkos::grow(int n)
|
|||
"atom:angle_atom3");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -282,7 +283,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
|||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
|
@ -309,7 +310,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
|
@ -397,8 +398,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
|||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
|
@ -429,8 +430,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
|
@ -493,13 +494,13 @@ struct AtomVecAngleKokkos_UnpackComm {
|
|||
void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
@ -642,7 +643,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -659,7 +660,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
|||
void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
@ -960,9 +961,9 @@ struct AtomVecAngleKokkos_UnpackBorder {
|
|||
void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
|
@ -984,7 +985,7 @@ void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -1010,7 +1011,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -1412,7 +1413,7 @@ int AtomVecAngleKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1487,7 +1488,7 @@ int AtomVecAngleKokkos::size_restart()
|
|||
|
||||
int AtomVecAngleKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1541,7 +1542,7 @@ int AtomVecAngleKokkos::unpack_restart(double *buf)
|
|||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -55,14 +55,15 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecAtomicKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -74,7 +75,7 @@ void AtomVecAtomicKokkos::grow(int n)
|
|||
memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -393,9 +394,9 @@ struct AtomVecAtomicKokkos_UnpackBorder {
|
|||
|
||||
void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
|
@ -415,7 +416,7 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -440,7 +441,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -668,7 +669,7 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK);
|
||||
|
||||
int m = 1;
|
||||
|
@ -720,7 +721,7 @@ int AtomVecAtomicKokkos::size_restart()
|
|||
|
||||
int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK );
|
||||
|
||||
int m = 1;
|
||||
|
@ -755,7 +756,7 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
|
|||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK );
|
||||
|
||||
int m = 1;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -58,14 +58,15 @@ AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecBondKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -84,7 +85,7 @@ void AtomVecBondKokkos::grow(int n)
|
|||
memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++)
|
||||
|
@ -468,9 +469,9 @@ struct AtomVecBondKokkos_UnpackBorder {
|
|||
void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
|
@ -492,7 +493,7 @@ void AtomVecBondKokkos::unpack_border(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -518,7 +519,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -866,7 +867,7 @@ int AtomVecBondKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
|
||||
int k;
|
||||
|
@ -934,7 +935,7 @@ int AtomVecBondKokkos::size_restart()
|
|||
|
||||
int AtomVecBondKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
int m = 1;
|
||||
buf[m++] = h_x(i,0);
|
||||
|
@ -978,7 +979,7 @@ int AtomVecBondKokkos::unpack_restart(double *buf)
|
|||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
int m = 1;
|
||||
h_x(nlocal,0) = buf[m++];
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -58,14 +58,15 @@ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecChargeKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -79,7 +80,7 @@ void AtomVecChargeKokkos::grow(int n)
|
|||
memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -494,7 +495,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -510,7 +511,7 @@ void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf)
|
|||
if (i == nmax) {
|
||||
grow(0);
|
||||
}
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -536,7 +537,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -797,7 +798,7 @@ int AtomVecChargeKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
|
@ -850,7 +851,7 @@ int AtomVecChargeKokkos::size_restart()
|
|||
|
||||
int AtomVecChargeKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
|
@ -888,7 +889,7 @@ int AtomVecChargeKokkos::unpack_restart(double *buf)
|
|||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -60,14 +60,15 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecDPDKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -93,7 +94,7 @@ void AtomVecDPDKokkos::grow(int n)
|
|||
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -158,7 +159,7 @@ void AtomVecDPDKokkos::grow_reset()
|
|||
|
||||
void AtomVecDPDKokkos::copy(int i, int j, int delflag)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
@ -184,7 +185,7 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag)
|
|||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
@ -268,7 +269,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
|||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
|
||||
|
@ -303,7 +304,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
|
@ -410,8 +411,8 @@ struct AtomVecDPDKokkos_PackCommSelf {
|
|||
int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
|
||||
|
@ -446,8 +447,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
|
@ -528,15 +529,15 @@ struct AtomVecDPDKokkos_UnpackComm {
|
|||
void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
struct AtomVecDPDKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,first);
|
||||
|
@ -552,7 +553,7 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf,
|
|||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -598,7 +599,7 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -685,7 +686,7 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf)
|
|||
h_uChem[i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -709,7 +710,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
h_uChem[i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -717,7 +718,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -734,8 +735,8 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
|||
void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
sync(Host,F_MASK);
|
||||
modified(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
|
@ -819,7 +820,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
|
|||
{
|
||||
X_FLOAT dx,dy,dz;
|
||||
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
|
@ -876,7 +877,7 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf,
|
|||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -937,7 +938,7 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -1032,7 +1033,7 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf)
|
|||
{
|
||||
int i,j,m;
|
||||
|
||||
sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
|
@ -1052,7 +1053,7 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf)
|
|||
{
|
||||
int i,j,m;
|
||||
|
||||
sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
m = 0;
|
||||
|
@ -1127,11 +1128,11 @@ struct AtomVecDPDKokkos_UnpackBorder {
|
|||
|
||||
void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
if(space==Host) {
|
||||
|
@ -1179,7 +1180,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf)
|
|||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
}
|
||||
|
@ -1217,7 +1218,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
}
|
||||
|
@ -1237,7 +1238,7 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
|||
h_uChem(i) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK );
|
||||
|
||||
return m;
|
||||
|
@ -1260,7 +1261,7 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf)
|
|||
h_uCGnew(i) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
return m;
|
||||
|
@ -1384,7 +1385,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
|||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
|
@ -1402,7 +1403,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
|||
|
||||
int AtomVecDPDKokkos::pack_exchange(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
|
@ -1518,7 +1519,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre
|
|||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
|
@ -1556,7 +1557,7 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf)
|
|||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(nlocal,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
|
@ -1593,7 +1594,7 @@ int AtomVecDPDKokkos::size_restart()
|
|||
|
||||
int AtomVecDPDKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
||||
|
@ -1658,7 +1659,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf)
|
|||
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -58,14 +58,15 @@ AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecFullKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -123,7 +124,7 @@ void AtomVecFullKokkos::grow(int n)
|
|||
atomKK->improper_per_atom,"atom:improper_atom4");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -608,9 +609,9 @@ struct AtomVecFullKokkos_UnpackBorder {
|
|||
void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
|
||||
|
@ -632,7 +633,7 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -659,7 +660,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -1204,7 +1205,7 @@ int AtomVecFullKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1297,7 +1298,7 @@ int AtomVecFullKokkos::size_restart()
|
|||
|
||||
int AtomVecFullKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1370,10 +1371,10 @@ int AtomVecFullKokkos::unpack_restart(double *buf)
|
|||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
|
|
@ -307,7 +307,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int
|
|||
int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz;
|
||||
|
@ -351,7 +351,7 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
|
|||
int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
@ -463,7 +463,7 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf)
|
|||
h_x(i,2) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
|
@ -500,7 +500,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
}
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
|
@ -512,7 +512,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
|
||||
int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int i,k,m,last;
|
||||
|
||||
|
@ -546,7 +546,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
|
|||
h_f(j,2) += buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
|
@ -559,7 +559,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
|
|||
int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz;
|
||||
|
@ -613,7 +613,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
|
|||
int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
int omega_flag = atom->omega_flag;
|
||||
|
@ -741,7 +741,7 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf)
|
|||
h_mask[i] = (int) ubuf(buf[m++]).i;
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
|
@ -787,7 +787,7 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
}
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
|
@ -969,7 +969,7 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord)
|
|||
|
||||
void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
|
@ -1000,7 +1000,7 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
|
|||
h_angmom(nlocal,2) = 0.0;
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// each sub-style parses sub-style specific values
|
||||
|
||||
|
@ -1017,13 +1017,13 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
|
|||
|
||||
void AtomVecHybridKokkos::data_vel(int m, char **values)
|
||||
{
|
||||
sync(Host,V_MASK);
|
||||
atomKK->sync(Host,V_MASK);
|
||||
|
||||
h_v(m,0) = atof(values[0]);
|
||||
h_v(m,1) = atof(values[1]);
|
||||
h_v(m,2) = atof(values[2]);
|
||||
|
||||
modified(Host,V_MASK);
|
||||
atomKK->modified(Host,V_MASK);
|
||||
|
||||
// each sub-style parses sub-style specific values
|
||||
|
||||
|
@ -1038,7 +1038,7 @@ void AtomVecHybridKokkos::data_vel(int m, char **values)
|
|||
|
||||
void AtomVecHybridKokkos::pack_data(double **buf)
|
||||
{
|
||||
sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
|
||||
atomKK->sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
|
||||
|
||||
int k,m;
|
||||
|
||||
|
@ -1089,7 +1089,7 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf)
|
|||
|
||||
void AtomVecHybridKokkos::pack_vel(double **buf)
|
||||
{
|
||||
sync(Host,V_MASK);
|
||||
atomKK->sync(Host,V_MASK);
|
||||
|
||||
int k,m;
|
||||
|
||||
|
|
|
@ -267,6 +267,114 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
|
|||
return n*3;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int TRICLINIC>
|
||||
struct AtomVecKokkos_PackCommSelfFused {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _pbc;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _pbc_flag;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _firstrecv;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _sendnum_scan;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _g2l;
|
||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||
|
||||
AtomVecKokkos_PackCommSelfFused(
|
||||
const typename DAT::tdual_x_array &x,
|
||||
const typename DAT::tdual_int_2d &list,
|
||||
const typename DAT::tdual_int_2d &pbc,
|
||||
const typename DAT::tdual_int_1d &pbc_flag,
|
||||
const typename DAT::tdual_int_1d &firstrecv,
|
||||
const typename DAT::tdual_int_1d &sendnum_scan,
|
||||
const typename DAT::tdual_int_1d &g2l,
|
||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz):
|
||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
|
||||
_list(list.view<DeviceType>()),
|
||||
_pbc(pbc.view<DeviceType>()),
|
||||
_pbc_flag(pbc_flag.view<DeviceType>()),
|
||||
_firstrecv(firstrecv.view<DeviceType>()),
|
||||
_sendnum_scan(sendnum_scan.view<DeviceType>()),
|
||||
_g2l(g2l.view<DeviceType>()),
|
||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
||||
_xy(xy),_xz(xz),_yz(yz) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& ii) const {
|
||||
|
||||
int iswap = 0;
|
||||
while (ii >= _sendnum_scan[iswap]) iswap++;
|
||||
int i = ii;
|
||||
if (iswap > 0)
|
||||
i = ii - _sendnum_scan[iswap-1];
|
||||
|
||||
const int _nfirst = _firstrecv[iswap];
|
||||
const int nlocal = _firstrecv[0];
|
||||
|
||||
int j = _list(iswap,i);
|
||||
if (j >= nlocal)
|
||||
j = _g2l(j-nlocal);
|
||||
|
||||
if (_pbc_flag(ii) == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0);
|
||||
_xw(i+_nfirst,1) = _x(j,1);
|
||||
_xw(i+_nfirst,2) = _x(j,2);
|
||||
} else {
|
||||
if (TRICLINIC == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
} else {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &g2l) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
|
|
@ -51,6 +51,14 @@ class AtomVecKokkos : public AtomVec {
|
|||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
|
||||
virtual int
|
||||
pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list,
|
||||
const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv,
|
||||
const DAT::tdual_int_1d &pbc_flag,
|
||||
const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &g2l);
|
||||
|
||||
virtual int
|
||||
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -58,14 +58,15 @@ AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
|||
|
||||
void AtomVecMolecularKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -121,7 +122,7 @@ void AtomVecMolecularKokkos::grow(int n)
|
|||
atomKK->improper_per_atom,"atom:improper_atom4");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -361,7 +362,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
|||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
|
||||
|
@ -388,7 +389,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
|
||||
|
@ -477,8 +478,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
|||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
|
@ -505,8 +506,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
|
@ -565,13 +566,13 @@ struct AtomVecMolecularKokkos_UnpackComm {
|
|||
void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
@ -714,7 +715,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -731,7 +732,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
|||
void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
@ -1032,9 +1033,9 @@ struct AtomVecMolecularKokkos_UnpackBorder {
|
|||
void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
|
@ -1056,7 +1057,7 @@ void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -1082,7 +1083,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
|
@ -1615,7 +1616,7 @@ int AtomVecMolecularKokkos::unpack_exchange(double *buf)
|
|||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1707,7 +1708,7 @@ int AtomVecMolecularKokkos::size_restart()
|
|||
|
||||
int AtomVecMolecularKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
@ -1780,7 +1781,7 @@ int AtomVecMolecularKokkos::unpack_restart(double *buf)
|
|||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
static const double MY_PI = 3.14159265358979323846; // pi
|
||||
|
||||
|
@ -93,14 +93,15 @@ void AtomVecSphereKokkos::init()
|
|||
|
||||
void AtomVecSphereKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atom->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
|
@ -120,7 +121,7 @@ void AtomVecSphereKokkos::grow(int n)
|
|||
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -172,7 +173,7 @@ void AtomVecSphereKokkos::grow_reset()
|
|||
|
||||
void AtomVecSphereKokkos::copy(int i, int j, int delflag)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
|
||||
RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
|
@ -197,7 +198,7 @@ void AtomVecSphereKokkos::copy(int i, int j, int delflag)
|
|||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
|
||||
RMASS_MASK | OMEGA_MASK);
|
||||
}
|
||||
|
@ -277,7 +278,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
|||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
|
||||
|
@ -316,7 +317,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
|
||||
|
@ -464,7 +465,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
|||
const int* const pbc)
|
||||
{
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
|
@ -595,7 +596,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
|
@ -795,8 +796,8 @@ int AtomVecSphereKokkos::pack_comm_self(
|
|||
if (radvary == 0)
|
||||
return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
|
||||
|
@ -835,8 +836,8 @@ int AtomVecSphereKokkos::pack_comm_self(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
|
||||
|
@ -926,14 +927,14 @@ void AtomVecSphereKokkos::unpack_comm_kokkos(
|
|||
return;
|
||||
}
|
||||
if(commKK->forward_comm_on_host) {
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
|
@ -998,7 +999,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
|
|||
const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
|
||||
atomKK->k_x,
|
||||
|
@ -1015,7 +1016,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,0> f(
|
||||
atomKK->k_x,
|
||||
|
@ -1044,7 +1045,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
|
|||
|
||||
if (radvary == 0) {
|
||||
// Not sure if we need to call sync for X here
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
|
@ -1071,7 +1072,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
|
@ -1115,7 +1116,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
if (radvary == 0) {
|
||||
sync(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
|
@ -1178,7 +1179,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
|
@ -1257,7 +1258,7 @@ int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf)
|
|||
{
|
||||
if (radvary == 0) return 0;
|
||||
|
||||
sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
@ -1280,7 +1281,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
|
|||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
} else {
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -1291,7 +1292,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
|
|||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1313,7 +1314,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
h_omega(i,1) = buf[m++];
|
||||
h_omega(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
} else {
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -1330,7 +1331,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
h_omega(i,1) = buf[m++];
|
||||
h_omega(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1346,7 +1347,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
|||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -1355,7 +1356,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
|||
int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK|TORQUE_MASK);
|
||||
atomKK->sync(Host,F_MASK|TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -1375,7 +1376,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
|||
int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,TORQUE_MASK);
|
||||
atomKK->sync(Host,TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
|
@ -1392,7 +1393,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
|||
void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
modified(Host,F_MASK|TORQUE_MASK);
|
||||
atomKK->modified(Host,F_MASK|TORQUE_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
|
@ -1412,7 +1413,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
|||
int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
modified(Host,TORQUE_MASK);
|
||||
atomKK->modified(Host,TORQUE_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
|
@ -1492,7 +1493,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
|
|||
X_FLOAT dx,dy,dz;
|
||||
|
||||
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
|
@ -1549,7 +1550,7 @@ int AtomVecSphereKokkos::pack_border(
|
|||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -1686,7 +1687,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
|
|||
X_FLOAT dvx=0,dvy=0,dvz=0;
|
||||
|
||||
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
|
@ -1776,7 +1777,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
|
@ -1866,7 +1867,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
|
||||
int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
@ -1942,7 +1943,7 @@ void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
|
||||
|
@ -1969,7 +1970,7 @@ void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf)
|
|||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2052,7 +2053,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
|
|||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
|
@ -2085,7 +2086,7 @@ void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -2098,7 +2099,7 @@ int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf)
|
|||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -2218,7 +2219,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
|||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
|
@ -2239,7 +2240,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
|||
|
||||
int AtomVecSphereKokkos::pack_exchange(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
|
@ -2354,7 +2355,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
|||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
|
@ -2391,7 +2392,7 @@ int AtomVecSphereKokkos::unpack_exchange(double *buf)
|
|||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(nlocal,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
|
@ -2427,7 +2428,7 @@ int AtomVecSphereKokkos::size_restart()
|
|||
|
||||
int AtomVecSphereKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | V_MASK |
|
||||
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
|
@ -2494,7 +2495,7 @@ int AtomVecSphereKokkos::unpack_restart(double *buf)
|
|||
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | V_MASK |
|
||||
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
|
@ -2616,14 +2617,14 @@ int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values)
|
|||
|
||||
void AtomVecSphereKokkos::data_vel(int m, char **values)
|
||||
{
|
||||
sync(Host,V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,V_MASK|OMEGA_MASK);
|
||||
h_v(m,0) = atof(values[0]);
|
||||
h_v(m,1) = atof(values[1]);
|
||||
h_v(m,2) = atof(values[2]);
|
||||
h_omega(m,0) = atof(values[3]);
|
||||
h_omega(m,1) = atof(values[4]);
|
||||
h_omega(m,2) = atof(values[5]);
|
||||
modified(Host,V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -2632,11 +2633,11 @@ void AtomVecSphereKokkos::data_vel(int m, char **values)
|
|||
|
||||
int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values)
|
||||
{
|
||||
sync(Host,OMEGA_MASK);
|
||||
atomKK->sync(Host,OMEGA_MASK);
|
||||
omega[m][0] = atof(values[0]);
|
||||
omega[m][1] = atof(values[1]);
|
||||
omega[m][2] = atof(values[2]);
|
||||
modified(Host,OMEGA_MASK);
|
||||
atomKK->modified(Host,OMEGA_MASK);
|
||||
return 3;
|
||||
}
|
||||
|
||||
|
@ -2711,7 +2712,7 @@ int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf)
|
|||
|
||||
void AtomVecSphereKokkos::pack_vel(double **buf)
|
||||
{
|
||||
sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
|
@ -2731,7 +2732,7 @@ void AtomVecSphereKokkos::pack_vel(double **buf)
|
|||
|
||||
int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf)
|
||||
{
|
||||
sync(Host,OMEGA_MASK);
|
||||
atomKK->sync(Host,OMEGA_MASK);
|
||||
|
||||
buf[0] = h_omega(i,0);
|
||||
buf[1] = h_omega(i,1);
|
||||
|
|
|
@ -57,10 +57,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
|||
memory->destroy(buf_recv);
|
||||
buf_recv = NULL;
|
||||
|
||||
k_exchange_sendlist = DAT::
|
||||
tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||
k_exchange_copylist = DAT::
|
||||
tdual_int_1d("comm:k_exchange_copylist",100);
|
||||
k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_count = DAT::tdual_int_scalar("comm:k_count");
|
||||
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
|
||||
|
||||
|
@ -187,71 +186,80 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||
k_sendlist.sync<DeviceType>();
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (comm->nprocs == 1 && !ghost_velocity) {
|
||||
k_swap.sync<DeviceType>();
|
||||
k_swap2.sync<DeviceType>();
|
||||
k_pbc.sync<DeviceType>();
|
||||
n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan,
|
||||
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
|
||||
} else {
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -496,9 +504,8 @@ void CommKokkos::exchange()
|
|||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
CommBrick::exchange();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -565,146 +572,149 @@ void CommKokkos::exchange_device()
|
|||
atom->nghost = 0;
|
||||
atom->avec->clear_bonus();
|
||||
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
if (comm->nprocs > 1) { // otherwise no-op
|
||||
|
||||
if (triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
if (triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
|
||||
if (true) {
|
||||
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
|
||||
k_sendflag.sync<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
if (true) {
|
||||
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
|
||||
k_sendflag.sync<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_exchange_sendlist.resize(k_count.h_view()*1.1);
|
||||
k_exchange_copylist.resize(k_count.h_view()*1.1);
|
||||
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_exchange_lists.resize(2,k_count.h_view()*1.1);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
|
||||
}
|
||||
}
|
||||
|
||||
k_exchange_lists.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for(int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = k_count.h_view();
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend =
|
||||
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,
|
||||
dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += avec->pack_exchange(i,&buf_send[nsend]);
|
||||
avec->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
}
|
||||
k_exchange_copylist.sync<LMPHostType>();
|
||||
k_exchange_sendlist.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
atom->nlocal = nlocal;
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for(int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = k_count.h_view();
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend =
|
||||
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,
|
||||
dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += avec->pack_exchange(i,&buf_send[nsend]);
|
||||
avec->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
}
|
||||
atom->nlocal = nlocal;
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
if (nrecv) {
|
||||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv = nrecv1;
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv += nrecv2;
|
||||
}
|
||||
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
|
||||
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
if (nrecv) {
|
||||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv = nrecv1;
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv += nrecv2;
|
||||
}
|
||||
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
|
||||
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
|
||||
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
|
||||
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
if (nrecv) {
|
||||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
if (nrecv) {
|
||||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
|
||||
}
|
||||
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
}
|
||||
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
if (atom->firstgroupname) {
|
||||
/* this is not yet implemented with Kokkos */
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
@ -742,14 +752,15 @@ void CommKokkos::borders()
|
|||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) borders_device<LMPHostType>();
|
||||
else borders_device<LMPDeviceType>();
|
||||
return;
|
||||
} else {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
CommBrick::borders();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::borders();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_classic)
|
||||
copy_swap_info();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -817,7 +828,6 @@ void CommKokkos::borders_device() {
|
|||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||
|
||||
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
k_sendlist.sync<DeviceType>();
|
||||
atomKK->sync(exec_space,ALL_MASK);
|
||||
|
||||
// do swaps over all 3 dimensions
|
||||
|
@ -1037,6 +1047,69 @@ void CommKokkos::borders_device() {
|
|||
atom->map_set();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy swap info
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::copy_swap_info()
|
||||
{
|
||||
if (nswap > k_swap.extent(1)) {
|
||||
k_swap = DAT::tdual_int_2d("comm:swap",2,nswap);
|
||||
k_firstrecv = Kokkos::subview(k_swap,0,Kokkos::ALL);
|
||||
k_sendnum_scan = Kokkos::subview(k_swap,1,Kokkos::ALL);
|
||||
}
|
||||
int scan = 0;
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
scan += sendnum[iswap];
|
||||
k_sendnum_scan.h_view[iswap] = scan;
|
||||
k_firstrecv.h_view[iswap] = firstrecv[iswap];
|
||||
}
|
||||
totalsend = scan;
|
||||
|
||||
// create map of ghost to local atom id
|
||||
// store periodic boundary transform from local to ghost
|
||||
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
|
||||
if (totalsend > k_pbc.extent(0)) {
|
||||
k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6);
|
||||
k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend);
|
||||
k_pbc_flag = Kokkos::subview(k_swap2,0,Kokkos::ALL);
|
||||
k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL);
|
||||
}
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
for (int i = 0; i < sendnum[iswap]; i++) {
|
||||
int source = sendlist[iswap][i] - atom->nlocal;
|
||||
int dest = firstrecv[iswap] + i - atom->nlocal;
|
||||
k_pbc_flag.h_view(dest) = pbc_flag[iswap];
|
||||
k_pbc.h_view(dest,0) = pbc[iswap][0];
|
||||
k_pbc.h_view(dest,1) = pbc[iswap][1];
|
||||
k_pbc.h_view(dest,2) = pbc[iswap][2];
|
||||
k_pbc.h_view(dest,3) = pbc[iswap][3];
|
||||
k_pbc.h_view(dest,4) = pbc[iswap][4];
|
||||
k_pbc.h_view(dest,5) = pbc[iswap][5];
|
||||
k_g2l.h_view(dest) = atom->nlocal + source;
|
||||
|
||||
if (source >= 0) {
|
||||
k_pbc_flag.h_view(dest) = k_pbc_flag.h_view(dest) || k_pbc_flag.h_view(source);
|
||||
k_pbc.h_view(dest,0) += k_pbc.h_view(source,0);
|
||||
k_pbc.h_view(dest,1) += k_pbc.h_view(source,1);
|
||||
k_pbc.h_view(dest,2) += k_pbc.h_view(source,2);
|
||||
k_pbc.h_view(dest,3) += k_pbc.h_view(source,3);
|
||||
k_pbc.h_view(dest,4) += k_pbc.h_view(source,4);
|
||||
k_pbc.h_view(dest,5) += k_pbc.h_view(source,5);
|
||||
k_g2l.h_view(dest) = k_g2l.h_view(source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k_swap.modify<LMPHostType>();
|
||||
k_swap2.modify<LMPHostType>();
|
||||
k_pbc.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
||||
if flag = 1, realloc
|
||||
|
|
|
@ -58,11 +58,21 @@ class CommKokkos : public CommBrick {
|
|||
DAT::tdual_int_2d k_sendlist;
|
||||
DAT::tdual_int_scalar k_total_send;
|
||||
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
||||
DAT::tdual_int_2d k_exchange_lists;
|
||||
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
|
||||
DAT::tdual_int_scalar k_count;
|
||||
//double *buf_send; // send buffer for all comm
|
||||
//double *buf_recv; // recv buffer for all comm
|
||||
|
||||
DAT::tdual_int_2d k_swap;
|
||||
DAT::tdual_int_2d k_swap2;
|
||||
DAT::tdual_int_2d k_pbc;
|
||||
DAT::tdual_int_1d k_pbc_flag;
|
||||
DAT::tdual_int_1d k_g2l;
|
||||
DAT::tdual_int_1d k_firstrecv;
|
||||
DAT::tdual_int_1d k_sendnum_scan;
|
||||
int totalsend;
|
||||
|
||||
int max_buf_pair;
|
||||
DAT::tdual_xfloat_1d k_buf_send_pair;
|
||||
DAT::tdual_xfloat_1d k_buf_recv_pair;
|
||||
|
@ -74,6 +84,7 @@ class CommKokkos : public CommBrick {
|
|||
void grow_recv_kokkos(int, ExecutionSpace space = Host);
|
||||
void grow_list(int, int);
|
||||
void grow_swap(int);
|
||||
void copy_swap_info();
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "kspace.h"
|
||||
#include "kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -339,6 +340,17 @@ struct DomainPBCFunctor {
|
|||
|
||||
void DomainKokkos::pbc()
|
||||
{
|
||||
|
||||
if (lmp->kokkos->exchange_comm_classic) {
|
||||
|
||||
// reduce GPU data movement
|
||||
|
||||
atomKK->sync(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK);
|
||||
Domain::pbc();
|
||||
atomKK->modified(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK);
|
||||
return;
|
||||
}
|
||||
|
||||
double *lo,*hi,*period;
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
|
|
|
@ -113,8 +113,8 @@ void FixNVEKokkos<DeviceType>::initial_integrate_rmass_item(int i) const
|
|||
template<class DeviceType>
|
||||
void FixNVEKokkos<DeviceType>::final_integrate()
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
atomKK->sync(execution_space,V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK);
|
||||
atomKK->modified(execution_space,V_MASK);
|
||||
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "memory_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "update.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
@ -61,8 +62,10 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
|
|||
size_t nbytes = (nmax-nmax_old) * sizeof(int);
|
||||
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
|
||||
} else if (style[m] == DOUBLE) {
|
||||
atomKK->sync(Device,DVECTOR_MASK);
|
||||
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
|
||||
"atom:dvector");
|
||||
atomKK->modified(Device,DVECTOR_MASK);
|
||||
//memory->grow(atom->dvector[index[m]],nmax,"atom:dvector");
|
||||
//size_t nbytes = (nmax-nmax_old) * sizeof(double);
|
||||
//memset(&atom->dvector[index[m]][nmax_old],0,nbytes);
|
||||
|
|
|
@ -58,7 +58,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
|
|||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK;
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
|
||||
datamask_modify = Q_MASK | X_MASK;
|
||||
|
||||
nmax = nmax = m_cap = 0;
|
||||
|
@ -164,6 +164,9 @@ void FixQEqReaxKokkos<DeviceType>::init_shielding_k()
|
|||
template<class DeviceType>
|
||||
void FixQEqReaxKokkos<DeviceType>::init_hist()
|
||||
{
|
||||
k_s_hist.clear_sync_state();
|
||||
k_t_hist.clear_sync_state();
|
||||
|
||||
Kokkos::deep_copy(d_s_hist,0.0);
|
||||
Kokkos::deep_copy(d_t_hist,0.0);
|
||||
|
||||
|
@ -189,7 +192,6 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
|
|||
if (update->ntimestep % nevery) return;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
|
@ -273,6 +275,8 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
|
|||
// free duplicated memory
|
||||
if (need_dup)
|
||||
dup_o = decltype(dup_o)();
|
||||
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -1199,9 +1203,12 @@ double FixQEqReaxKokkos<DeviceType>::memory_usage()
|
|||
template<class DeviceType>
|
||||
void FixQEqReaxKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
{
|
||||
k_s_hist.template sync<LMPHostType>(); // force reallocation on host
|
||||
k_s_hist.template sync<LMPHostType>();
|
||||
k_t_hist.template sync<LMPHostType>();
|
||||
|
||||
k_s_hist.template modify<LMPHostType>(); // force reallocation on host
|
||||
k_t_hist.template modify<LMPHostType>();
|
||||
|
||||
memoryKK->grow_kokkos(k_s_hist,s_hist,nmax,nprev,"qeq:s_hist");
|
||||
memoryKK->grow_kokkos(k_t_hist,t_hist,nmax,nprev,"qeq:t_hist");
|
||||
|
||||
|
|
|
@ -184,10 +184,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
|
||||
binsize = 0.0;
|
||||
gpu_direct_flag = 1;
|
||||
neigh_thread = 0;
|
||||
neigh_thread_set = 0;
|
||||
neighflag_qeq_set = 0;
|
||||
if (ngpu > 0) {
|
||||
neighflag = FULL;
|
||||
neighflag_qeq = FULL;
|
||||
neighflag_qeq_set = 0;
|
||||
newtonflag = 0;
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
|
@ -199,7 +201,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
neighflag = HALF;
|
||||
neighflag_qeq = HALF;
|
||||
}
|
||||
neighflag_qeq_set = 0;
|
||||
newtonflag = 1;
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
|
@ -318,6 +319,13 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
|||
else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"neigh/thread") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0;
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) neigh_thread = 1;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
neigh_thread_set = 1;
|
||||
iarg += 2;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
}
|
||||
|
||||
|
@ -337,6 +345,9 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
|||
|
||||
force->newton = force->newton_pair = force->newton_bond = newtonflag;
|
||||
|
||||
if (neigh_thread && neighflag != FULL)
|
||||
error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'neigh/thread on'");
|
||||
|
||||
neighbor->binsize_user = binsize;
|
||||
if (binsize <= 0.0) neighbor->binsizeflag = 0;
|
||||
else neighbor->binsizeflag = 1;
|
||||
|
|
|
@ -36,6 +36,8 @@ class KokkosLMP : protected Pointers {
|
|||
int numa;
|
||||
int auto_sync;
|
||||
int gpu_direct_flag;
|
||||
int neigh_thread;
|
||||
int neigh_thread_set;
|
||||
int newtonflag;
|
||||
double binsize;
|
||||
|
||||
|
@ -87,4 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs
|
|||
|
||||
Using Kokkos half-neighbor lists with threading is not allowed.
|
||||
|
||||
E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on'
|
||||
|
||||
The 'neigh/thread on' option requires a full neighbor list
|
||||
|
||||
*/
|
||||
|
|
|
@ -448,6 +448,52 @@ struct s_EV_FLOAT_REAX {
|
|||
};
|
||||
typedef struct s_EV_FLOAT_REAX EV_FLOAT_REAX;
|
||||
|
||||
struct s_FEV_FLOAT {
|
||||
F_FLOAT f[3];
|
||||
E_FLOAT evdwl;
|
||||
E_FLOAT ecoul;
|
||||
E_FLOAT v[6];
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_FEV_FLOAT() {
|
||||
f[0] = 0; f[1] = 0; f[2] = 0;
|
||||
evdwl = 0;
|
||||
ecoul = 0;
|
||||
v[0] = 0; v[1] = 0; v[2] = 0;
|
||||
v[3] = 0; v[4] = 0; v[5] = 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+=(const s_FEV_FLOAT &rhs) {
|
||||
f[0] += rhs.f[0];
|
||||
f[1] += rhs.f[1];
|
||||
f[2] += rhs.f[2];
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+=(const volatile s_FEV_FLOAT &rhs) volatile {
|
||||
f[0] += rhs.f[0];
|
||||
f[1] += rhs.f[1];
|
||||
f[2] += rhs.f[2];
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
}
|
||||
};
|
||||
typedef struct s_FEV_FLOAT FEV_FLOAT;
|
||||
|
||||
#ifndef PREC_POS
|
||||
#define PREC_POS PRECISION
|
||||
#endif
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "update.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "nbin_kokkos.h"
|
||||
#include "nstencil.h"
|
||||
|
@ -27,6 +28,16 @@ namespace LAMMPS_NS {
|
|||
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
|
||||
NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
|
||||
|
||||
// use 1D view for scalars to reduce GPU memory operations
|
||||
|
||||
d_scalars = typename AT::t_int_1d("neighbor:scalars",2);
|
||||
h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2);
|
||||
|
||||
d_resize = Kokkos::subview(d_scalars,0);
|
||||
d_new_maxneighs = Kokkos::subview(d_scalars,1);
|
||||
|
||||
h_resize = Kokkos::subview(h_scalars,0);
|
||||
h_new_maxneighs = Kokkos::subview(h_scalars,1);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -84,27 +95,30 @@ template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
|
|||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_stencil_info()
|
||||
{
|
||||
NPair::copy_stencil_info();
|
||||
|
||||
nstencil = ns->nstencil;
|
||||
|
||||
int maxstencil = ns->get_maxstencil();
|
||||
if (neighbor->last_setup_bins == update->ntimestep) {
|
||||
// copy stencil to device as it may have changed
|
||||
|
||||
if (maxstencil > k_stencil.extent(0))
|
||||
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++)
|
||||
k_stencil.h_view(k) = ns->stencil[k];
|
||||
k_stencil.modify<LMPHostType>();
|
||||
k_stencil.sync<DeviceType>();
|
||||
if (GHOST) {
|
||||
if (maxstencil > k_stencilxyz.extent(0))
|
||||
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
|
||||
int maxstencil = ns->get_maxstencil();
|
||||
|
||||
if (maxstencil > k_stencil.extent(0))
|
||||
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++)
|
||||
k_stencil.h_view(k) = ns->stencil[k];
|
||||
k_stencil.modify<LMPHostType>();
|
||||
k_stencil.sync<DeviceType>();
|
||||
if (GHOST) {
|
||||
if (maxstencil > k_stencilxyz.extent(0))
|
||||
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
|
||||
}
|
||||
k_stencilxyz.modify<LMPHostType>();
|
||||
k_stencilxyz.sync<DeviceType>();
|
||||
}
|
||||
k_stencilxyz.modify<LMPHostType>();
|
||||
k_stencilxyz.sync<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -126,7 +140,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),
|
||||
k_atom2bin.view<DeviceType>(),
|
||||
nstencil,
|
||||
mbins,nstencil,
|
||||
k_stencil.view<DeviceType>(),
|
||||
k_stencilxyz.view<DeviceType>(),
|
||||
nlocal,
|
||||
|
@ -157,7 +171,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half,
|
||||
skin);
|
||||
skin,d_resize,h_resize,d_new_maxneighs,h_new_maxneighs);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
|
@ -173,7 +187,18 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
k_bincount.sync<DeviceType>();
|
||||
k_bins.sync<DeviceType>();
|
||||
k_atom2bin.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
|
||||
if (atom->molecular) {
|
||||
if (exclude)
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
else
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
} else {
|
||||
if (exclude)
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK);
|
||||
else
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK);
|
||||
}
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
|
@ -185,8 +210,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
Kokkos::deep_copy(d_scalars, h_scalars);
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
|
@ -245,10 +269,9 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
}
|
||||
}
|
||||
}
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
Kokkos::deep_copy(h_scalars, d_scalars);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
|
@ -488,7 +511,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
|||
|
||||
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >=c_bincount.extent(0)) return;
|
||||
if(ibin >= mbins) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
other_x = other_x + 5*atoms_per_bin*MY_BIN;
|
||||
|
||||
|
@ -924,7 +947,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
|||
|
||||
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >=c_bincount.extent(0)) return;
|
||||
if(ibin >= mbins) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
other_x = other_x + 6*atoms_per_bin*MY_BIN;
|
||||
|
||||
|
|
|
@ -95,6 +95,8 @@ namespace LAMMPS_NS {
|
|||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
|
||||
class NPairKokkos : public NPair {
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
public:
|
||||
NPairKokkos(class LAMMPS *);
|
||||
~NPairKokkos() {}
|
||||
|
@ -105,6 +107,12 @@ class NPairKokkos : public NPair {
|
|||
|
||||
private:
|
||||
int newton_pair;
|
||||
typename AT::t_int_1d d_scalars;
|
||||
HAT::t_int_1d h_scalars;
|
||||
typename AT::t_int_scalar d_resize;
|
||||
typename AT::t_int_scalar d_new_maxneighs;
|
||||
HAT::t_int_scalar h_resize;
|
||||
HAT::t_int_scalar h_new_maxneighs;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
|
@ -165,6 +173,7 @@ class NeighborKokkosExecute
|
|||
|
||||
// data from NBin class
|
||||
|
||||
const int mbins;
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
|
@ -218,7 +227,7 @@ class NeighborKokkosExecute
|
|||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const typename AT::t_int_1d &_atom2bin,
|
||||
const int _nstencil,
|
||||
const int _mbins,const int _nstencil,
|
||||
const typename AT::t_int_1d &_d_stencil,
|
||||
const typename AT::t_int_1d_3 &_d_stencilxyz,
|
||||
const int _nlocal,
|
||||
|
@ -251,8 +260,12 @@ class NeighborKokkosExecute
|
|||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half,
|
||||
const X_FLOAT _skin):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
const X_FLOAT _skin,
|
||||
const typename AT::t_int_scalar _resize,
|
||||
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_resize,
|
||||
const typename AT::t_int_scalar _new_maxneighs,
|
||||
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),mbins(_mbins),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
atom2bin(_atom2bin),c_atom2bin(_atom2bin),
|
||||
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
|
||||
|
@ -272,7 +285,8 @@ class NeighborKokkosExecute
|
|||
ex_mol_intra(_ex_mol_intra),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half),
|
||||
skin(_skin) {
|
||||
skin(_skin),resize(_resize),h_resize(_h_resize),
|
||||
new_maxneighs(_new_maxneighs),h_new_maxneighs(_h_new_maxneighs) {
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
@ -280,20 +294,7 @@ class NeighborKokkosExecute
|
|||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
|
|
|
@ -86,6 +86,7 @@ struct PairComputeFunctor {
|
|||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {
|
||||
// allocate duplicated memory
|
||||
f = c.f;
|
||||
dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.f);
|
||||
dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.d_eatom);
|
||||
dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, NeedDup<NEIGHFLAG,device_type>::value >(c.d_vatom);
|
||||
|
@ -255,6 +256,328 @@ struct PairComputeFunctor {
|
|||
return ev;
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
KOKKOS_FUNCTION
|
||||
void compute_item_team(Kokkos::TeamPolicy<>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
const int firstatom = team.league_rank()*atoms_per_team;
|
||||
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
t_scalar3<double> fsum;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
||||
[&] (const int jj, t_scalar3<double>& ftmp) {
|
||||
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
|
||||
ftmp.x += delx*fpair;
|
||||
ftmp.y += dely*fpair;
|
||||
ftmp.z += delz*fpair;
|
||||
}
|
||||
|
||||
},fsum);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
f(i,0) += fsum.x;
|
||||
f(i,1) += fsum.y;
|
||||
f(i,2) += fsum.z;
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
KOKKOS_FUNCTION
|
||||
void compute_item_team(Kokkos::TeamPolicy<>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
int firstatom = team.league_rank()*atoms_per_team;
|
||||
int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
const F_FLOAT qtmp = c.q(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
t_scalar3<double> fsum;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
||||
[&] (const int jj, t_scalar3<double>& ftmp) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
F_FLOAT fpair = F_FLOAT();
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
||||
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
|
||||
ftmp.x += delx*fpair;
|
||||
ftmp.y += dely*fpair;
|
||||
ftmp.z += delz*fpair;
|
||||
}
|
||||
},fsum);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
f(i,0) += fsum.x;
|
||||
f(i,1) += fsum.y;
|
||||
f(i,2) += fsum.z;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
const int firstatom = team.league_rank()*atoms_per_team;
|
||||
const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
FEV_FLOAT fev;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
||||
[&] (const int jj, FEV_FLOAT& fev_tmp) {
|
||||
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
|
||||
fev_tmp.f[0] += delx*fpair;
|
||||
fev_tmp.f[1] += dely*fpair;
|
||||
fev_tmp.f[2] += delz*fpair;
|
||||
|
||||
F_FLOAT evdwl = 0.0;
|
||||
if (c.eflag) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fev.evdwl += 0.5*evdwl;
|
||||
}
|
||||
if (c.vflag_either) {
|
||||
fev.v[0] += 0.5*delx*delx*fpair;
|
||||
fev.v[1] += 0.5*dely*dely*fpair;
|
||||
fev.v[2] += 0.5*delz*delz*fpair;
|
||||
fev.v[3] += 0.5*delx*dely*fpair;
|
||||
fev.v[4] += 0.5*delx*delz*fpair;
|
||||
fev.v[5] += 0.5*dely*delz*fpair;
|
||||
}
|
||||
}
|
||||
},fev);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
f(i,0) += fev.f[0];
|
||||
f(i,1) += fev.f[1];
|
||||
f(i,2) += fev.f[2];
|
||||
|
||||
if (c.eflag_global)
|
||||
ev.evdwl += fev.evdwl;
|
||||
|
||||
if (c.eflag_atom)
|
||||
d_eatom(i,0) += fev.evdwl;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += fev.v[0];
|
||||
ev.v[1] += fev.v[1];
|
||||
ev.v[2] += fev.v[2];
|
||||
ev.v[3] += fev.v[3];
|
||||
ev.v[4] += fev.v[4];
|
||||
ev.v[5] += fev.v[5];
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
d_vatom(i,0) += fev.v[0];
|
||||
d_vatom(i,1) += fev.v[1];
|
||||
d_vatom(i,2) += fev.v[2];
|
||||
d_vatom(i,3) += fev.v[3];
|
||||
d_vatom(i,4) += fev.v[4];
|
||||
d_vatom(i,5) += fev.v[5];
|
||||
}
|
||||
});
|
||||
});
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team,
|
||||
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
const int inum = team.league_size();
|
||||
const int atoms_per_team = team.team_size();
|
||||
int firstatom = team.league_rank()*atoms_per_team;
|
||||
int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) {
|
||||
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
const F_FLOAT qtmp = c.q(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
FEV_FLOAT fev;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum),
|
||||
[&] (const int jj, FEV_FLOAT& fev_tmp) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
F_FLOAT fpair = F_FLOAT();
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
||||
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
|
||||
fev.f[0] += delx*fpair;
|
||||
fev.f[1] += dely*fpair;
|
||||
fev.f[2] += delz*fpair;
|
||||
|
||||
F_FLOAT evdwl = 0.0;
|
||||
F_FLOAT ecoul = 0.0;
|
||||
if (c.eflag) {
|
||||
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
|
||||
evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
ev.evdwl += 0.5*evdwl;
|
||||
}
|
||||
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
|
||||
ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
ev.ecoul += 0.5*ecoul;
|
||||
}
|
||||
}
|
||||
if (c.vflag) {
|
||||
fev.v[0] += 0.5*delx*delx*fpair;
|
||||
fev.v[1] += 0.5*dely*dely*fpair;
|
||||
fev.v[2] += 0.5*delz*delz*fpair;
|
||||
fev.v[3] += 0.5*delx*dely*fpair;
|
||||
fev.v[4] += 0.5*delx*delz*fpair;
|
||||
fev.v[5] += 0.5*dely*delz*fpair;
|
||||
}
|
||||
}
|
||||
},fev);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
f(i,0) += fev.f[0];
|
||||
f(i,1) += fev.f[1];
|
||||
f(i,2) += fev.f[2];
|
||||
|
||||
if (c.eflag_global) {
|
||||
ev.evdwl += fev.evdwl;
|
||||
ev.ecoul += fev.ecoul;
|
||||
}
|
||||
|
||||
if (c.eflag_atom)
|
||||
d_eatom(i,0) += fev.evdwl + fev.ecoul;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += fev.v[0];
|
||||
ev.v[1] += fev.v[1];
|
||||
ev.v[2] += fev.v[2];
|
||||
ev.v[3] += fev.v[3];
|
||||
ev.v[4] += fev.v[4];
|
||||
ev.v[5] += fev.v[5];
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
d_vatom(i,0) += fev.v[0];
|
||||
d_vatom(i,1) += fev.v[1];
|
||||
d_vatom(i,2) += fev.v[2];
|
||||
d_vatom(i,3) += fev.v[3];
|
||||
d_vatom(i,4) += fev.v[4];
|
||||
d_vatom(i,5) += fev.v[5];
|
||||
}
|
||||
});
|
||||
});
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
|
@ -355,6 +678,16 @@ struct PairComputeFunctor {
|
|||
else
|
||||
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const {
|
||||
compute_item_team(team,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const {
|
||||
energy_virial += compute_item_team_ev(team,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
|
@ -489,6 +822,15 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
|||
void operator()(const int i, value_type &energy_virial) const {
|
||||
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const
|
||||
{}
|
||||
|
||||
};
|
||||
|
||||
// Filter out Neighflags which are not supported for PairStyle
|
||||
|
@ -507,20 +849,57 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
|||
return ev;
|
||||
}
|
||||
|
||||
template<class FunctorStyle>
|
||||
int GetTeamSize(FunctorStyle& functor, int team_size, int vector_length) {
|
||||
int team_size_max = Kokkos::TeamPolicy<>::team_size_max(functor);
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
if(team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
#else
|
||||
team_size = 1;
|
||||
#endif
|
||||
return team_size;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
ff.contribute();
|
||||
|
||||
if (!fpair->lmp->kokkos->neigh_thread_set)
|
||||
if (list->inum <= 16384 && NEIGHFLAG == FULL)
|
||||
fpair->lmp->kokkos->neigh_thread = 1;
|
||||
|
||||
if (fpair->lmp->kokkos->neigh_thread) {
|
||||
int vector_length = 8;
|
||||
int atoms_per_team = 32;
|
||||
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
||||
atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length);
|
||||
Kokkos::TeamPolicy<Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
|
||||
else Kokkos::parallel_for(policy,ff);
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
||||
atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length);
|
||||
Kokkos::TeamPolicy<Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
|
||||
else Kokkos::parallel_for(policy,ff);
|
||||
}
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
ff.contribute();
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
ff.contribute();
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
ff.contribute();
|
||||
}
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
|
|
@ -93,7 +93,6 @@ void VerletKokkos::setup(int flag)
|
|||
}
|
||||
|
||||
update->setupflag = 1;
|
||||
lmp->kokkos->auto_sync = 0;
|
||||
|
||||
// setup domain, communication and neighboring
|
||||
// acquire ghosts
|
||||
|
@ -187,6 +186,7 @@ void VerletKokkos::setup(int flag)
|
|||
}
|
||||
if (force->newton) comm->reverse_comm();
|
||||
|
||||
lmp->kokkos->auto_sync = 0;
|
||||
modify->setup(vflag);
|
||||
output->setup(flag);
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
@ -202,7 +202,6 @@ void VerletKokkos::setup(int flag)
|
|||
void VerletKokkos::setup_minimal(int flag)
|
||||
{
|
||||
update->setupflag = 1;
|
||||
lmp->kokkos->auto_sync = 0;
|
||||
|
||||
// setup domain, communication and neighboring
|
||||
// acquire ghosts
|
||||
|
@ -294,7 +293,6 @@ void VerletKokkos::setup_minimal(int flag)
|
|||
if (force->newton) comm->reverse_comm();
|
||||
|
||||
modify->setup(vflag);
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
update->setupflag = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -126,6 +126,8 @@ class Neighbor : protected Pointers {
|
|||
|
||||
bigint memory_usage();
|
||||
|
||||
bigint last_setup_bins; // step of last neighbor::setup_bins() call
|
||||
|
||||
protected:
|
||||
int me,nprocs;
|
||||
int firsttime; // flag for calling init_styles() only once
|
||||
|
@ -139,8 +141,6 @@ class Neighbor : protected Pointers {
|
|||
int fix_check; // # of fixes that induce reneigh
|
||||
int *fixchecklist; // which fixes to check
|
||||
|
||||
bigint last_setup_bins; // step of last neighbor::setup_bins() call
|
||||
|
||||
double triggersq; // trigger = build when atom moves this dist
|
||||
|
||||
double **xhold; // atom coords at last neighbor build
|
||||
|
|
Loading…
Reference in New Issue