forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12365 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
d1e51c4359
commit
d9f92faf40
|
@ -40,12 +40,12 @@ if (test $1 = 1) then
|
|||
if (test -e ../Makefile.package) then
|
||||
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package
|
||||
# sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package
|
||||
# sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
|
||||
fi
|
||||
|
||||
if (test -e ../Makefile.package.settings) then
|
||||
|
|
|
@ -37,31 +37,43 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
|
|||
|
||||
AtomKokkos::~AtomKokkos()
|
||||
{
|
||||
k_tag = DAT::tdual_tagint_1d();
|
||||
k_mask = DAT::tdual_int_1d();
|
||||
k_type = DAT::tdual_int_1d();
|
||||
k_image = DAT::tdual_imageint_1d();
|
||||
k_molecule = DAT::tdual_tagint_1d();
|
||||
memory->destroy_kokkos(k_tag, tag);
|
||||
memory->destroy_kokkos(k_mask, mask);
|
||||
memory->destroy_kokkos(k_type, type);
|
||||
memory->destroy_kokkos(k_image, image);
|
||||
memory->destroy_kokkos(k_molecule, molecule);
|
||||
|
||||
k_x = DAT::tdual_x_array();
|
||||
k_v = DAT::tdual_v_array();
|
||||
k_f = DAT::tdual_f_array();
|
||||
memory->destroy_kokkos(k_x, x);
|
||||
memory->destroy_kokkos(k_v, v);
|
||||
memory->destroy_kokkos(k_f, f);
|
||||
|
||||
k_mass = DAT::tdual_float_1d();
|
||||
memory->destroy_kokkos(k_mass, mass);
|
||||
|
||||
tag = NULL;
|
||||
mask = NULL;
|
||||
type = NULL;
|
||||
image = NULL;
|
||||
molecule = NULL;
|
||||
mass = NULL;
|
||||
memory->destroy_kokkos(k_q,q);
|
||||
|
||||
memory->destroy_kokkos(k_nspecial, nspecial);
|
||||
memory->destroy_kokkos(k_special, special);
|
||||
memory->destroy_kokkos(k_num_bond, num_bond);
|
||||
memory->destroy_kokkos(k_bond_type, bond_type);
|
||||
memory->destroy_kokkos(k_bond_atom, bond_atom);
|
||||
memory->destroy_kokkos(k_num_angle, num_angle);
|
||||
memory->destroy_kokkos(k_angle_type, angle_type);
|
||||
memory->destroy_kokkos(k_angle_atom1, angle_atom1);
|
||||
memory->destroy_kokkos(k_angle_atom2, angle_atom2);
|
||||
memory->destroy_kokkos(k_angle_atom3, angle_atom3);
|
||||
memory->destroy_kokkos(k_num_dihedral, num_dihedral);
|
||||
memory->destroy_kokkos(k_dihedral_type, dihedral_type);
|
||||
memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
|
||||
memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
|
||||
memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
|
||||
memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
|
||||
memory->destroy_kokkos(k_num_improper, num_improper);
|
||||
memory->destroy_kokkos(k_improper_type, improper_type);
|
||||
memory->destroy_kokkos(k_improper_atom1, improper_atom1);
|
||||
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
|
||||
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
|
||||
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
|
||||
|
||||
memory->sfree(x);
|
||||
memory->sfree(v);
|
||||
memory->sfree(f);
|
||||
x = NULL;
|
||||
v = NULL;
|
||||
f = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -96,9 +108,6 @@ void AtomKokkos::sort()
|
|||
{
|
||||
int i,m,n,ix,iy,iz,ibin,empty;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
modified(Host,ALL_MASK);
|
||||
|
||||
// set next timestep for sorting to take place
|
||||
|
||||
nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
|
||||
|
@ -122,6 +131,9 @@ void AtomKokkos::sort()
|
|||
|
||||
if (nlocal == nmax) avec->grow(0);
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
modified(Host,ALL_MASK);
|
||||
|
||||
// bin atoms in reverse order so linked list will be in forward order
|
||||
|
||||
for (i = 0; i < nbins; i++) binhead[i] = -1;
|
||||
|
@ -188,3 +200,43 @@ void AtomKokkos::sort()
|
|||
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
//if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reallocate memory to the pointer selected by the mask
|
||||
------------------------------------------------------------------------- */
|
||||
void AtomKokkos::grow(unsigned int mask){
|
||||
|
||||
if (mask && SPECIAL_MASK){
|
||||
memory->destroy_kokkos(k_special, special);
|
||||
sync(Device, mask);
|
||||
modified(Device, mask);
|
||||
memory->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special");
|
||||
avec->grow_reset();
|
||||
sync(Host, mask);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AtomKokkos::deallocate_topology()
|
||||
{
|
||||
memory->destroy_kokkos(k_bond_type, bond_type);
|
||||
memory->destroy_kokkos(k_bond_atom, bond_atom);
|
||||
|
||||
memory->destroy_kokkos(k_angle_type, angle_type);
|
||||
memory->destroy_kokkos(k_angle_atom1, angle_atom1);
|
||||
memory->destroy_kokkos(k_angle_atom2, angle_atom2);
|
||||
memory->destroy_kokkos(k_angle_atom3, angle_atom3);
|
||||
|
||||
memory->destroy_kokkos(k_dihedral_type, dihedral_type);
|
||||
memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
|
||||
memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
|
||||
memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
|
||||
memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
|
||||
|
||||
memory->destroy_kokkos(k_improper_type, improper_type);
|
||||
memory->destroy_kokkos(k_improper_atom1, improper_atom1);
|
||||
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
|
||||
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
|
||||
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,15 +21,33 @@ namespace LAMMPS_NS {
|
|||
|
||||
class AtomKokkos : public Atom {
|
||||
public:
|
||||
DAT::tdual_tagint_1d k_tag, k_molecule;
|
||||
DAT::tdual_imageint_1d k_image;
|
||||
DAT::tdual_tagint_1d k_tag;
|
||||
DAT::tdual_int_1d k_type, k_mask;
|
||||
DAT::tdual_imageint_1d k_image;
|
||||
DAT::tdual_x_array k_x;
|
||||
DAT::tdual_v_array k_v;
|
||||
DAT::tdual_f_array k_f;
|
||||
|
||||
|
||||
DAT::tdual_float_1d k_mass;
|
||||
|
||||
DAT::tdual_float_1d k_q;
|
||||
DAT::tdual_tagint_1d k_molecule;
|
||||
DAT::tdual_int_2d k_nspecial;
|
||||
DAT::tdual_tagint_2d k_special;
|
||||
DAT::tdual_int_1d k_num_bond;
|
||||
DAT::tdual_int_2d k_bond_type;
|
||||
DAT::tdual_tagint_2d k_bond_atom;
|
||||
DAT::tdual_int_1d k_num_angle;
|
||||
DAT::tdual_int_2d k_angle_type;
|
||||
DAT::tdual_tagint_2d k_angle_atom1, k_angle_atom2, k_angle_atom3;
|
||||
DAT::tdual_int_1d k_num_dihedral;
|
||||
DAT::tdual_int_2d k_dihedral_type;
|
||||
DAT::tdual_tagint_2d k_dihedral_atom1, k_dihedral_atom2, k_dihedral_atom3, k_dihedral_atom4;
|
||||
DAT::tdual_int_1d k_num_improper;
|
||||
DAT::tdual_int_2d k_improper_type;
|
||||
DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4;
|
||||
|
||||
AtomKokkos(class LAMMPS *);
|
||||
~AtomKokkos();
|
||||
|
||||
|
@ -37,6 +55,8 @@ class AtomKokkos : public Atom {
|
|||
void sync(const ExecutionSpace space, unsigned int mask);
|
||||
void modified(const ExecutionSpace space, unsigned int mask);
|
||||
virtual void sort();
|
||||
virtual void grow(unsigned int mask);
|
||||
virtual void deallocate_topology();
|
||||
};
|
||||
|
||||
template<class ViewType, class IndexView>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,153 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(angle/kk,AtomVecAngleKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_ANGLE_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_ANGLE_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecAngleKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecAngleKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecAngleKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int, char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
tagint *molecule;
|
||||
int **nspecial;
|
||||
tagint **special;
|
||||
int *num_bond;
|
||||
int **bond_type;
|
||||
tagint **bond_atom;
|
||||
|
||||
int *num_angle;
|
||||
int **angle_type;
|
||||
tagint **angle_atom1,**angle_atom2,**angle_atom3;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_tagint_1d d_molecule;
|
||||
DAT::t_int_2d d_nspecial;
|
||||
DAT::t_tagint_2d d_special;
|
||||
DAT::t_int_1d d_num_bond;
|
||||
DAT::t_int_2d d_bond_type;
|
||||
DAT::t_tagint_2d d_bond_atom;
|
||||
|
||||
HAT::t_tagint_1d h_molecule;
|
||||
HAT::t_int_2d h_nspecial;
|
||||
HAT::t_tagint_2d h_special;
|
||||
HAT::t_int_1d h_num_bond;
|
||||
HAT::t_int_2d h_bond_type;
|
||||
HAT::t_tagint_2d h_bond_atom;
|
||||
|
||||
DAT::t_int_1d d_num_angle;
|
||||
DAT::t_int_2d d_angle_type;
|
||||
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
|
||||
|
||||
HAT::t_int_1d h_num_angle;
|
||||
HAT::t_int_2d h_angle_type;
|
||||
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -553,14 +553,15 @@ void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
|
|||
|
||||
int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
int i,m,last;
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
|
||||
m = 0;
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
buf[m++] = f[i][0];
|
||||
buf[m++] = f[i][1];
|
||||
buf[m++] = f[i][2];
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
for (int i = first; i < last; i++) {
|
||||
buf[m++] = h_f(i,0);
|
||||
buf[m++] = h_f(i,1);
|
||||
buf[m++] = h_f(i,2);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
@ -569,14 +570,17 @@ int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
|
|||
|
||||
void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
if(n > 0) {
|
||||
sync(Host,F_MASK);
|
||||
modified(Host,F_MASK);
|
||||
}
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
j = list[i];
|
||||
f[j][0] += buf[m++];
|
||||
f[j][1] += buf[m++];
|
||||
f[j][2] += buf[m++];
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
const int j = list[i];
|
||||
h_f(j,0) += buf[m++];
|
||||
h_f(j,1) += buf[m++];
|
||||
h_f(j,2) += buf[m++];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -588,11 +592,11 @@ struct AtomVecAtomicKokkos_PackBorder {
|
|||
|
||||
typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
|
||||
const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
const int _iswap;
|
||||
const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d _type;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d _mask;
|
||||
const int _iswap;
|
||||
X_FLOAT _dx,_dy,_dz;
|
||||
|
||||
AtomVecAtomicKokkos_PackBorder(
|
||||
|
@ -694,9 +698,9 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
|
|||
buf[m++] = h_x(j,0);
|
||||
buf[m++] = h_x(j,1);
|
||||
buf[m++] = h_x(j,2);
|
||||
buf[m++] = h_tag[j];
|
||||
buf[m++] = h_type[j];
|
||||
buf[m++] = h_mask[j];
|
||||
buf[m++] = ubuf(h_tag(j)).d;
|
||||
buf[m++] = ubuf(h_type(j)).d;
|
||||
buf[m++] = ubuf(h_mask(j)).d;
|
||||
}
|
||||
} else {
|
||||
if (domain->triclinic == 0) {
|
||||
|
@ -713,11 +717,16 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
|
|||
buf[m++] = h_x(j,0) + dx;
|
||||
buf[m++] = h_x(j,1) + dy;
|
||||
buf[m++] = h_x(j,2) + dz;
|
||||
buf[m++] = h_tag[j];
|
||||
buf[m++] = h_type[j];
|
||||
buf[m++] = h_mask[j];
|
||||
buf[m++] = ubuf(h_tag(j)).d;
|
||||
buf[m++] = ubuf(h_type(j)).d;
|
||||
buf[m++] = ubuf(h_mask(j)).d;
|
||||
}
|
||||
}
|
||||
|
||||
if (atom->nextra_border)
|
||||
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
|
||||
m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -736,9 +745,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
buf[m++] = h_x(j,0);
|
||||
buf[m++] = h_x(j,1);
|
||||
buf[m++] = h_x(j,2);
|
||||
buf[m++] = h_tag[j];
|
||||
buf[m++] = h_type[j];
|
||||
buf[m++] = h_mask[j];
|
||||
buf[m++] = ubuf(h_tag(j)).d;
|
||||
buf[m++] = ubuf(h_type(j)).d;
|
||||
buf[m++] = ubuf(h_mask(j)).d;
|
||||
buf[m++] = h_v(j,0);
|
||||
buf[m++] = h_v(j,1);
|
||||
buf[m++] = h_v(j,2);
|
||||
|
@ -759,9 +768,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
buf[m++] = h_x(j,0) + dx;
|
||||
buf[m++] = h_x(j,1) + dy;
|
||||
buf[m++] = h_x(j,2) + dz;
|
||||
buf[m++] = h_tag[j];
|
||||
buf[m++] = h_type[j];
|
||||
buf[m++] = h_mask[j];
|
||||
buf[m++] = ubuf(h_tag(j)).d;
|
||||
buf[m++] = ubuf(h_type(j)).d;
|
||||
buf[m++] = ubuf(h_mask(j)).d;
|
||||
buf[m++] = h_v(j,0);
|
||||
buf[m++] = h_v(j,1);
|
||||
buf[m++] = h_v(j,2);
|
||||
|
@ -775,9 +784,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
buf[m++] = h_x(j,0) + dx;
|
||||
buf[m++] = h_x(j,1) + dy;
|
||||
buf[m++] = h_x(j,2) + dz;
|
||||
buf[m++] = h_tag[j];
|
||||
buf[m++] = h_type[j];
|
||||
buf[m++] = h_mask[j];
|
||||
buf[m++] = ubuf(h_tag(j)).d;
|
||||
buf[m++] = ubuf(h_type(j)).d;
|
||||
buf[m++] = ubuf(h_mask(j)).d;
|
||||
if (mask[i] & deform_groupbit) {
|
||||
buf[m++] = h_v(j,0) + dvx;
|
||||
buf[m++] = h_v(j,1) + dvy;
|
||||
|
@ -790,6 +799,11 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (atom->nextra_border)
|
||||
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
|
||||
m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
|
@ -861,10 +875,15 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
|
|||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
h_tag[i] = static_cast<int> (buf[m++]);
|
||||
h_type[i] = static_cast<int> (buf[m++]);
|
||||
h_mask[i] = static_cast<int> (buf[m++]);
|
||||
h_tag(i) = (tagint) ubuf(buf[m++]).i;
|
||||
h_type(i) = (int) ubuf(buf[m++]).i;
|
||||
h_mask(i) = (int) ubuf(buf[m++]).i;
|
||||
}
|
||||
|
||||
if (atom->nextra_border)
|
||||
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -880,13 +899,18 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
|
|||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
h_tag[i] = static_cast<int> (buf[m++]);
|
||||
h_type[i] = static_cast<int> (buf[m++]);
|
||||
h_mask[i] = static_cast<int> (buf[m++]);
|
||||
h_tag(i) = (tagint) ubuf(buf[m++]).i;
|
||||
h_type(i) = (int) ubuf(buf[m++]).i;
|
||||
h_mask(i) = (int) ubuf(buf[m++]).i;
|
||||
h_v(i,0) = buf[m++];
|
||||
h_v(i,1) = buf[m++];
|
||||
h_v(i,2) = buf[m++];
|
||||
}
|
||||
|
||||
if (atom->nextra_border)
|
||||
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -895,7 +919,6 @@ template<class DeviceType>
|
|||
struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
X_FLOAT _lo,_hi;
|
||||
typename AT::t_x_array_randomread _x;
|
||||
typename AT::t_v_array_randomread _v;
|
||||
typename AT::t_tagint_1d_randomread _tag;
|
||||
|
@ -910,9 +933,10 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
|||
typename AT::t_imageint_1d _imagew;
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
int _nlocal,_dim;
|
||||
typename AT::t_int_1d_const _sendlist;
|
||||
typename AT::t_int_1d_const _copylist;
|
||||
int _nlocal,_dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
|
@ -977,7 +1001,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
|||
|
||||
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
|
||||
{
|
||||
if(nsend > (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
|
||||
int newsize = nsend*11/k_buf.view<LMPHostType>().dimension_1()+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
|
||||
}
|
||||
|
@ -1005,10 +1029,10 @@ int AtomVecAtomicKokkos::pack_exchange(int i, double *buf)
|
|||
buf[m++] = h_v(i,0);
|
||||
buf[m++] = h_v(i,1);
|
||||
buf[m++] = h_v(i,2);
|
||||
buf[m++] = h_tag[i];
|
||||
buf[m++] = h_type[i];
|
||||
buf[m++] = h_mask[i];
|
||||
*((tagint *) &buf[m++]) = h_image[i];
|
||||
buf[m++] = ubuf(h_tag(i)).d;
|
||||
buf[m++] = ubuf(h_type(i)).d;
|
||||
buf[m++] = ubuf(h_mask(i)).d;
|
||||
buf[m++] = ubuf(h_image(i)).d;
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -1024,7 +1048,6 @@ template<class DeviceType>
|
|||
struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
X_FLOAT _lo,_hi;
|
||||
typename AT::t_x_array _x;
|
||||
typename AT::t_v_array _v;
|
||||
typename AT::t_tagint_1d _tag;
|
||||
|
@ -1033,8 +1056,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
|||
typename AT::t_imageint_1d _image;
|
||||
|
||||
typename AT::t_xfloat_2d_um _buf;
|
||||
int _dim;
|
||||
typename AT::t_int_1d _nlocal;
|
||||
int _dim;
|
||||
X_FLOAT _lo,_hi;
|
||||
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor(
|
||||
const AtomKokkos* atom,
|
||||
|
@ -1113,10 +1137,10 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
|
|||
h_v(nlocal,0) = buf[m++];
|
||||
h_v(nlocal,1) = buf[m++];
|
||||
h_v(nlocal,2) = buf[m++];
|
||||
h_tag[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_type[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_mask[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_image[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
|
||||
h_type(nlocal) = (int) ubuf(buf[m++]).i;
|
||||
h_mask(nlocal) = (int) ubuf(buf[m++]).i;
|
||||
h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
|
@ -1159,10 +1183,10 @@ int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
|
|||
buf[m++] = h_x(i,0);
|
||||
buf[m++] = h_x(i,1);
|
||||
buf[m++] = h_x(i,2);
|
||||
buf[m++] = h_tag[i];
|
||||
buf[m++] = h_type[i];
|
||||
buf[m++] = h_mask[i];
|
||||
buf[m++] = h_image[i];
|
||||
buf[m++] = ubuf(h_tag(i)).d;
|
||||
buf[m++] = ubuf(h_type(i)).d;
|
||||
buf[m++] = ubuf(h_mask(i)).d;
|
||||
buf[m++] = ubuf(h_image(i)).d;
|
||||
buf[m++] = h_v(i,0);
|
||||
buf[m++] = h_v(i,1);
|
||||
buf[m++] = h_v(i,2);
|
||||
|
@ -1192,17 +1216,17 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
|
|||
h_x(nlocal,0) = buf[m++];
|
||||
h_x(nlocal,1) = buf[m++];
|
||||
h_x(nlocal,2) = buf[m++];
|
||||
h_tag[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_type[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_mask[nlocal] = static_cast<int> (buf[m++]);
|
||||
h_image[nlocal] = *((tagint *) &buf[m++]);
|
||||
h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
|
||||
h_type(nlocal) = (int) ubuf(buf[m++]).i;
|
||||
h_mask(nlocal) = (int) ubuf(buf[m++]).i;
|
||||
h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
|
||||
h_v(nlocal,0) = buf[m++];
|
||||
h_v(nlocal,1) = buf[m++];
|
||||
h_v(nlocal,2) = buf[m++];
|
||||
|
||||
double **extra = atom->extra;
|
||||
if (atom->nextra_store) {
|
||||
int size = static_cast<int> (buf[0]) - m;
|
||||
int size = static_cast<int> (ubuf(buf[m++]).i) - m;
|
||||
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,141 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(bond/kk,AtomVecBondKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_BOND_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_BOND_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecBondKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecBondKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecBondKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int, char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
tagint *molecule;
|
||||
int **nspecial;
|
||||
tagint **special;
|
||||
int *num_bond;
|
||||
int **bond_type;
|
||||
tagint **bond_atom;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_tagint_1d d_molecule;
|
||||
DAT::t_int_2d d_nspecial;
|
||||
DAT::t_tagint_2d d_special;
|
||||
DAT::t_int_1d d_num_bond;
|
||||
DAT::t_int_2d d_bond_type;
|
||||
DAT::t_tagint_2d d_bond_atom;
|
||||
|
||||
HAT::t_tagint_1d h_molecule;
|
||||
HAT::t_int_2d h_nspecial;
|
||||
HAT::t_tagint_2d h_special;
|
||||
HAT::t_int_1d h_num_bond;
|
||||
HAT::t_int_2d h_bond_type;
|
||||
HAT::t_tagint_2d h_bond_atom;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,126 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(charge/kk,AtomVecChargeKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_CHARGE_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_CHARGE_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecChargeKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecChargeKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecChargeKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int , char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
double *q;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_float_1d d_q;
|
||||
|
||||
HAT::t_float_1d h_q;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,183 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(full/kk,AtomVecFullKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_FULL_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_FULL_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecFullKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecFullKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecFullKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int, char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
double *q;
|
||||
|
||||
tagint *molecule;
|
||||
int **nspecial;
|
||||
tagint **special;
|
||||
int *num_bond;
|
||||
int **bond_type;
|
||||
tagint **bond_atom;
|
||||
|
||||
int *num_angle;
|
||||
int **angle_type;
|
||||
tagint **angle_atom1,**angle_atom2,**angle_atom3;
|
||||
|
||||
int *num_dihedral;
|
||||
int **dihedral_type;
|
||||
tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
|
||||
int *num_improper;
|
||||
int **improper_type;
|
||||
tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_float_1d d_q;
|
||||
HAT::t_float_1d h_q;
|
||||
|
||||
DAT::t_tagint_1d d_molecule;
|
||||
DAT::t_int_2d d_nspecial;
|
||||
DAT::t_tagint_2d d_special;
|
||||
DAT::t_int_1d d_num_bond;
|
||||
DAT::t_int_2d d_bond_type;
|
||||
DAT::t_tagint_2d d_bond_atom;
|
||||
|
||||
HAT::t_tagint_1d h_molecule;
|
||||
HAT::t_int_2d h_nspecial;
|
||||
HAT::t_tagint_2d h_special;
|
||||
HAT::t_int_1d h_num_bond;
|
||||
HAT::t_int_2d h_bond_type;
|
||||
HAT::t_tagint_2d h_bond_atom;
|
||||
|
||||
DAT::t_int_1d d_num_angle;
|
||||
DAT::t_int_2d d_angle_type;
|
||||
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
|
||||
|
||||
HAT::t_int_1d h_num_angle;
|
||||
HAT::t_int_2d h_angle_type;
|
||||
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
|
||||
|
||||
DAT::t_int_1d d_num_dihedral;
|
||||
DAT::t_int_2d d_dihedral_type;
|
||||
DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
|
||||
d_dihedral_atom3,d_dihedral_atom4;
|
||||
DAT::t_int_1d d_num_improper;
|
||||
DAT::t_int_2d d_improper_type;
|
||||
DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
|
||||
d_improper_atom3,d_improper_atom4;
|
||||
|
||||
HAT::t_int_1d h_num_dihedral;
|
||||
HAT::t_int_2d h_dihedral_type;
|
||||
HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
|
||||
h_dihedral_atom3,h_dihedral_atom4;
|
||||
HAT::t_int_1d h_num_improper;
|
||||
HAT::t_int_2d h_improper_type;
|
||||
HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
|
||||
h_improper_atom3,h_improper_atom4;
|
||||
|
||||
HAT::tdual_int_1d k_count;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -24,43 +24,43 @@ class AtomVecKokkos : public AtomVec {
|
|||
AtomVecKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecKokkos() {}
|
||||
|
||||
virtual void sync(ExecutionSpace space, unsigned int mask) {};
|
||||
virtual void modified(ExecutionSpace space, unsigned int mask) {};
|
||||
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
|
||||
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
|
||||
|
||||
virtual int
|
||||
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[])
|
||||
{return 0;}
|
||||
const int &pbc_flag, const int pbc[]) = 0;
|
||||
//{return 0;}
|
||||
virtual int
|
||||
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[])
|
||||
{return 0;}
|
||||
const int &pbc_flag, const int pbc[]) = 0;
|
||||
//{return 0;}
|
||||
virtual void
|
||||
unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf) {};
|
||||
const DAT::tdual_xfloat_2d &buf) = 0;
|
||||
virtual int
|
||||
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space)
|
||||
{return 0;};
|
||||
int pbc_flag, int *pbc, ExecutionSpace space) = 0;
|
||||
//{return 0;};
|
||||
virtual void
|
||||
unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {};
|
||||
ExecutionSpace space) = 0;
|
||||
|
||||
virtual int
|
||||
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi)
|
||||
{return 0;};
|
||||
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
|
||||
//{return 0;};
|
||||
virtual int
|
||||
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space)
|
||||
{return 0;};
|
||||
ExecutionSpace space) = 0;
|
||||
//{return 0;};
|
||||
|
||||
protected:
|
||||
class AtomKokkos *atomKK;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,178 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(molecular/kk,AtomVecMolecularKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecMolecularKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecMolecularKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecMolecularKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
int pack_border_hybrid(int, int *, double *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int unpack_border_hybrid(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
int data_atom_hybrid(int, char **);
|
||||
void pack_data(double **);
|
||||
int pack_data_hybrid(int, double *);
|
||||
void write_data(FILE *, int, double **);
|
||||
int write_data_hybrid(FILE *, double *);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
|
||||
tagint *tag;
|
||||
int *type,*mask;
|
||||
imageint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
tagint *molecule;
|
||||
int **nspecial;
|
||||
tagint **special;
|
||||
int *num_bond;
|
||||
int **bond_type;
|
||||
tagint **bond_atom;
|
||||
|
||||
int *num_angle;
|
||||
int **angle_type;
|
||||
tagint **angle_atom1,**angle_atom2,**angle_atom3;
|
||||
|
||||
int *num_dihedral;
|
||||
int **dihedral_type;
|
||||
tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
|
||||
int *num_improper;
|
||||
int **improper_type;
|
||||
tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
|
||||
|
||||
DAT::t_tagint_1d d_tag;
|
||||
DAT::t_int_1d d_type, d_mask;
|
||||
HAT::t_tagint_1d h_tag;
|
||||
HAT::t_int_1d h_type, h_mask;
|
||||
|
||||
DAT::t_imageint_1d d_image;
|
||||
HAT::t_imageint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::t_tagint_1d d_molecule;
|
||||
DAT::t_int_2d d_nspecial;
|
||||
DAT::t_tagint_2d d_special;
|
||||
DAT::t_int_1d d_num_bond;
|
||||
DAT::t_int_2d d_bond_type;
|
||||
DAT::t_tagint_2d d_bond_atom;
|
||||
|
||||
HAT::t_tagint_1d h_molecule;
|
||||
HAT::t_int_2d h_nspecial;
|
||||
HAT::t_tagint_2d h_special;
|
||||
HAT::t_int_1d h_num_bond;
|
||||
HAT::t_int_2d h_bond_type;
|
||||
HAT::t_tagint_2d h_bond_atom;
|
||||
|
||||
DAT::t_int_1d d_num_angle;
|
||||
DAT::t_int_2d d_angle_type;
|
||||
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
|
||||
|
||||
HAT::t_int_1d h_num_angle;
|
||||
HAT::t_int_2d h_angle_type;
|
||||
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
|
||||
|
||||
DAT::t_int_1d d_num_dihedral;
|
||||
DAT::t_int_2d d_dihedral_type;
|
||||
DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
|
||||
d_dihedral_atom3,d_dihedral_atom4;
|
||||
DAT::t_int_1d d_num_improper;
|
||||
DAT::t_int_2d d_improper_type;
|
||||
DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
|
||||
d_improper_atom3,d_improper_atom4;
|
||||
|
||||
HAT::t_int_1d h_num_dihedral;
|
||||
HAT::t_int_2d h_dihedral_type;
|
||||
HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
|
||||
h_dihedral_atom3,h_dihedral_atom4;
|
||||
HAT::t_int_1d h_num_improper;
|
||||
HAT::t_int_2d h_improper_type;
|
||||
HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
|
||||
h_improper_atom3,h_improper_atom4;
|
||||
|
||||
HAT::tdual_int_1d k_count;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -21,6 +21,13 @@
|
|||
#include "atom_masks.h"
|
||||
#include "error.h"
|
||||
#include "memory.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "fix.h"
|
||||
#include "compute.h"
|
||||
#include "dump.h"
|
||||
#include "output.h"
|
||||
#include "modify.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -43,15 +50,19 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
|||
|
||||
// initialize comm buffers & exchange memory
|
||||
|
||||
maxsend = BUFMIN;
|
||||
k_buf_send = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
|
||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||
// maxsend = BUFMIN;
|
||||
// k_buf_send = ArrayTypes<LMPDeviceType>::
|
||||
// tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
|
||||
// buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||
maxsend = 0;
|
||||
buf_send = NULL;
|
||||
|
||||
maxrecv = BUFMIN;
|
||||
k_buf_recv = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
|
||||
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
||||
// maxrecv = BUFMIN;
|
||||
// k_buf_recv = ArrayTypes<LMPDeviceType>::
|
||||
// tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
|
||||
// buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
||||
maxrecv = 0;
|
||||
buf_recv = NULL;
|
||||
|
||||
k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
|
||||
tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||
|
@ -89,6 +100,34 @@ void CommKokkos::init()
|
|||
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
|
||||
|
||||
CommBrick::init();
|
||||
|
||||
int check_forward = 0;
|
||||
int check_reverse = 0;
|
||||
if (force->pair)
|
||||
check_forward += force->pair->comm_forward;
|
||||
if (force->pair)
|
||||
check_reverse += force->pair->comm_reverse;
|
||||
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
check_forward += modify->fix[i]->comm_forward;
|
||||
check_reverse += modify->fix[i]->comm_reverse;
|
||||
}
|
||||
|
||||
for (int i = 0; i < modify->ncompute; i++) {
|
||||
check_forward += modify->compute[i]->comm_forward;
|
||||
check_reverse += modify->compute[i]->comm_reverse;
|
||||
}
|
||||
|
||||
for (int i = 0; i < output->ndump; i++) {
|
||||
check_forward += output->dump[i]->comm_forward;
|
||||
check_reverse += output->dump[i]->comm_reverse;
|
||||
}
|
||||
|
||||
if (force->newton == 0) check_reverse = 0;
|
||||
if (force->pair) check_reverse += force->pair->comm_reverse_off;
|
||||
|
||||
if(check_reverse || check_forward)
|
||||
forward_comm_classic = true;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -98,8 +137,7 @@ void CommKokkos::init()
|
|||
|
||||
void CommKokkos::forward_comm(int dummy)
|
||||
{
|
||||
|
||||
if (!forward_comm_classic) {
|
||||
if (!forward_comm_classic) {
|
||||
if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
|
||||
else forward_comm_device<LMPDeviceType>(dummy);
|
||||
return;
|
||||
|
@ -205,6 +243,68 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||
}
|
||||
}
|
||||
}
|
||||
void CommKokkos::reverse_comm()
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
if (comm_f_only)
|
||||
atomKK->sync(Host,F_MASK);
|
||||
else
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
CommBrick::reverse_comm();
|
||||
if (comm_f_only)
|
||||
atomKK->modified(Host,F_MASK);
|
||||
else
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm_fix(Fix *fix)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_fix(fix);
|
||||
}
|
||||
|
||||
void CommKokkos::reverse_comm_fix(Fix *fix)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm_fix(fix);
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm_compute(Compute *compute)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_compute(compute);
|
||||
}
|
||||
|
||||
void CommKokkos::reverse_comm_compute(Compute *compute)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm_compute(compute);
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm_pair(Pair *pair)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_pair(pair);
|
||||
}
|
||||
|
||||
void CommKokkos::reverse_comm_pair(Pair *pair)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm_pair(pair);
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm_dump(Dump *dump)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_dump(dump);
|
||||
}
|
||||
|
||||
void CommKokkos::reverse_comm_dump(Dump *dump)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm_dump(dump);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
exchange: move atoms to correct processors
|
||||
|
@ -219,6 +319,16 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||
|
||||
void CommKokkos::exchange()
|
||||
{
|
||||
if(atom->nextra_grow + atom->nextra_border) {
|
||||
if(!exchange_comm_classic) {
|
||||
static int print = 1;
|
||||
if(print) {
|
||||
error->warning(FLERR,"Kokkos communication does not currently support fixes sending data. Switching to classic communication.");
|
||||
print = 0;
|
||||
}
|
||||
exchange_comm_classic = true;
|
||||
}
|
||||
}
|
||||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) exchange_device<LMPHostType>();
|
||||
else exchange_device<LMPDeviceType>();
|
||||
|
@ -463,10 +573,12 @@ void CommKokkos::borders()
|
|||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
CommBrick::borders();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -496,7 +608,7 @@ struct BuildBorderListFunctor {
|
|||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (DeviceType dev) const {
|
||||
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
|
||||
const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) /
|
||||
dev.league_size());
|
||||
const int teamstart = chunk*dev.league_rank() + nfirst;
|
||||
|
@ -517,7 +629,7 @@ struct BuildBorderListFunctor {
|
|||
}
|
||||
}
|
||||
|
||||
size_t shmem_size() const { return 1000u;}
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return 1000u;}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -591,16 +703,19 @@ void CommKokkos::borders_device() {
|
|||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPDeviceType>();
|
||||
}
|
||||
|
||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPHostType>();
|
||||
|
||||
if(total_send.h_view(0) >= maxsendlist[iswap]) {
|
||||
grow_list(iswap,total_send.h_view(0));
|
||||
k_sendlist.modify<DeviceType>();
|
||||
total_send.h_view(0) = 0;
|
||||
if(exec_space == Device) {
|
||||
total_send.template modify<LMPHostType>();
|
||||
|
@ -608,7 +723,7 @@ void CommKokkos::borders_device() {
|
|||
}
|
||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
total_send.template modify<DeviceType>();
|
||||
|
@ -742,6 +857,25 @@ void CommKokkos::borders_device() {
|
|||
atomKK->modified(exec_space,ALL_MASK);
|
||||
DeviceType::fence();
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
||||
if flag = 1, realloc
|
||||
if flag = 0, don't need to realloc with copy, just free/malloc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_send(int n, int flag)
|
||||
{
|
||||
grow_send_kokkos(n,flag,Host);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free/malloc the size of the recv buffer as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_recv(int n)
|
||||
{
|
||||
grow_recv_kokkos(n,Host);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
||||
|
|
|
@ -33,9 +33,19 @@ class CommKokkos : public CommBrick {
|
|||
void init();
|
||||
|
||||
void forward_comm(int dummy = 0); // forward comm of atom coords
|
||||
void reverse_comm(); // reverse comm of atom coords
|
||||
void exchange(); // move atoms to new procs
|
||||
void borders(); // setup list of atoms to comm
|
||||
|
||||
void forward_comm_pair(class Pair *); // forward comm from a Pair
|
||||
void reverse_comm_pair(class Pair *); // reverse comm from a Pair
|
||||
void forward_comm_fix(class Fix *); // forward comm from a Fix
|
||||
void reverse_comm_fix(class Fix *); // reverse comm from a Fix
|
||||
void forward_comm_compute(class Compute *); // forward from a Compute
|
||||
void reverse_comm_compute(class Compute *); // reverse from a Compute
|
||||
void forward_comm_dump(class Dump *); // forward comm from a Dump
|
||||
void reverse_comm_dump(class Dump *); // reverse comm from a Dump
|
||||
|
||||
template<class DeviceType> void forward_comm_device(int dummy);
|
||||
template<class DeviceType> void exchange_device();
|
||||
template<class DeviceType> void borders_device();
|
||||
|
@ -48,6 +58,8 @@ class CommKokkos : public CommBrick {
|
|||
//double *buf_send; // send buffer for all comm
|
||||
//double *buf_recv; // recv buffer for all comm
|
||||
|
||||
void grow_send(int, int);
|
||||
void grow_recv(int);
|
||||
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
|
||||
void grow_recv_kokkos(int, ExecutionSpace space = Host);
|
||||
void grow_list(int, int);
|
||||
|
|
|
@ -0,0 +1,810 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "fix_langevin_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
#include "memory.h"
|
||||
#include "group.h"
|
||||
#include "random_mars.h"
|
||||
#include "compute.h"
|
||||
#include "comm.h"
|
||||
#include "modify.h"
|
||||
#include "input.h"
|
||||
#include "variable.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
enum{NOBIAS,BIAS};
|
||||
enum{CONSTANT,EQUAL,ATOM};
|
||||
#define SINERTIA 0.4 // moment of inertia prefactor for sphere
|
||||
#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
int ntypes = atomKK->ntypes;
|
||||
|
||||
// allocate per-type arrays for force prefactors
|
||||
memory->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1");
|
||||
memory->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2");
|
||||
memory->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio");
|
||||
d_gfactor1 = k_gfactor1.template view<DeviceType>();
|
||||
h_gfactor1 = k_gfactor1.template view<LMPHostType>();
|
||||
d_gfactor2 = k_gfactor2.template view<DeviceType>();
|
||||
h_gfactor2 = k_gfactor2.template view<LMPHostType>();
|
||||
d_ratio = k_ratio.template view<DeviceType>();
|
||||
h_ratio = k_ratio.template view<LMPHostType>();
|
||||
|
||||
// optional args
|
||||
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
|
||||
k_ratio.template modify<LMPHostType>();
|
||||
|
||||
if(gjfflag){
|
||||
nvalues = 3;
|
||||
grow_arrays(atomKK->nmax);
|
||||
atom->add_callback(0);
|
||||
// initialize franprev to zero
|
||||
for (int i = 0; i < atomKK->nlocal; i++) {
|
||||
franprev[i][0] = 0.0;
|
||||
franprev[i][1] = 0.0;
|
||||
franprev[i][2] = 0.0;
|
||||
}
|
||||
k_franprev.template modify<LMPHostType>();
|
||||
}
|
||||
if(zeroflag){
|
||||
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
|
||||
h_fsumall = k_fsumall.template view<LMPHostType>();
|
||||
d_fsumall = k_fsumall.template view<DeviceType>();
|
||||
}
|
||||
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
|
||||
datamask_modify = F_MASK;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
|
||||
{
|
||||
memory->destroy_kokkos(k_gfactor1,gfactor1);
|
||||
memory->destroy_kokkos(k_gfactor2,gfactor2);
|
||||
memory->destroy_kokkos(k_ratio,ratio);
|
||||
memory->destroy_kokkos(k_flangevin,flangevin);
|
||||
if(gjfflag) memory->destroy_kokkos(k_franprev,franprev);
|
||||
memory->destroy_kokkos(k_tforce,tforce);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::init()
|
||||
{
|
||||
FixLangevin::init();
|
||||
if(oflag)
|
||||
error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
|
||||
if(ascale)
|
||||
error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
|
||||
|
||||
// prefactors are modified in the init
|
||||
k_gfactor1.template modify<LMPHostType>();
|
||||
k_gfactor2.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax)
|
||||
{
|
||||
memory->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev");
|
||||
d_franprev = k_franprev.template view<DeviceType>();
|
||||
h_franprev = k_franprev.template view<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::post_force(int vflag)
|
||||
{
|
||||
// sync the device views which might have been modified on host
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
rmass = atomKK->rmass;
|
||||
f = atomKK->k_f.template view<DeviceType>();
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
type = atomKK->k_type.template view<DeviceType>();
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
|
||||
k_gfactor1.template sync<DeviceType>();
|
||||
k_gfactor2.template sync<DeviceType>();
|
||||
k_ratio.template sync<DeviceType>();
|
||||
if(gjfflag) k_franprev.template sync<DeviceType>();
|
||||
|
||||
boltz = force->boltz;
|
||||
dt = update->dt;
|
||||
mvv2e = force->mvv2e;
|
||||
ftm2v = force->ftm2v;
|
||||
fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e);
|
||||
|
||||
compute_target(); // modifies tforce vector, hence sync here
|
||||
k_tforce.template sync<DeviceType>();
|
||||
|
||||
double fsum[3],fsumall[3];
|
||||
bigint count;
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
if (zeroflag) {
|
||||
fsum[0] = fsum[1] = fsum[2] = 0.0;
|
||||
count = group->count(igroup);
|
||||
if (count == 0)
|
||||
error->all(FLERR,"Cannot zero Langevin force of 0 atoms");
|
||||
}
|
||||
|
||||
// reallocate flangevin if necessary
|
||||
if (tallyflag) {
|
||||
if (nlocal > maxatom1) {
|
||||
memory->destroy_kokkos(k_flangevin,flangevin);
|
||||
maxatom1 = atomKK->nmax;
|
||||
memory->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin");
|
||||
d_flangevin = k_flangevin.template view<DeviceType>();
|
||||
h_flangevin = k_flangevin.template view<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
// account for bias velocity
|
||||
if(tbiasflag == BIAS){
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all(); // modifies velocities
|
||||
// if temeprature compute is kokkosized host-devcie comm won't be needed
|
||||
atomKK->modified(Host,V_MASK);
|
||||
atomKK->sync(execution_space,V_MASK);
|
||||
}
|
||||
|
||||
// compute langevin force in parallel on the device
|
||||
FSUM s_fsum;
|
||||
if (tstyle == ATOM)
|
||||
if (gjfflag)
|
||||
if (tallyflag)
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else{
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else{
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tallyflag)
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (gjfflag)
|
||||
if (tallyflag)
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tallyflag)
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (tbiasflag == BIAS)
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (rmass)
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
else
|
||||
if (zeroflag) {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,1> post_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
|
||||
}
|
||||
else {
|
||||
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,0> post_functor(this);
|
||||
Kokkos::parallel_for(nlocal,post_functor);
|
||||
}
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if(tbiasflag == BIAS){
|
||||
temperature->restore_bias_all(); // modifies velocities
|
||||
atomKK->modified(Host,V_MASK);
|
||||
}
|
||||
|
||||
// set modify flags for the views modified in post_force functor
|
||||
if (gjfflag) k_franprev.template modify<DeviceType>();
|
||||
if (tallyflag) k_flangevin.template modify<DeviceType>();
|
||||
|
||||
// set total force to zero
|
||||
if (zeroflag) {
|
||||
fsum[0] = s_fsum.fx; fsum[1] = s_fsum.fy; fsum[2] = s_fsum.fz;
|
||||
MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world);
|
||||
h_fsumall(0) = fsumall[0]/count;
|
||||
h_fsumall(1) = fsumall[1]/count;
|
||||
h_fsumall(2) = fsumall[2]/count;
|
||||
k_fsumall.template modify<LMPHostType>();
|
||||
k_fsumall.template sync<DeviceType>();
|
||||
// set total force zero in parallel on the device
|
||||
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
|
||||
Kokkos::parallel_for(nlocal,zero_functor);
|
||||
DeviceType::fence();
|
||||
}
|
||||
// f is modified by both post_force and zero_force functors
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
// thermostat omega and angmom
|
||||
// if (oflag) omega_thermostat();
|
||||
// if (ascale) angmom_thermostat();
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
|
||||
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
|
||||
{
|
||||
FSUM fsum;
|
||||
double fdrag[3],fran[3];
|
||||
double gamma1,gamma2;
|
||||
double fswap;
|
||||
double tsqrt_t = tsqrt;
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
rand_type rand_gen = rand_pool.get_state();
|
||||
if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
|
||||
if(Tp_RMASS){
|
||||
gamma1 = -rmass[i] / t_period / ftm2v;
|
||||
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
|
||||
gamma1 *= 1.0/d_ratio[type[i]];
|
||||
gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t;
|
||||
} else {
|
||||
gamma1 = d_gfactor1[type[i]];
|
||||
gamma2 = d_gfactor2[type[i]] * tsqrt_t;
|
||||
}
|
||||
|
||||
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
|
||||
if(Tp_BIAS){
|
||||
fdrag[0] = gamma1*v(i,0);
|
||||
fdrag[1] = gamma1*v(i,1);
|
||||
fdrag[2] = gamma1*v(i,2);
|
||||
if (v(i,0) == 0.0) fran[0] = 0.0;
|
||||
if (v(i,1) == 0.0) fran[1] = 0.0;
|
||||
if (v(i,2) == 0.0) fran[2] = 0.0;
|
||||
}else{
|
||||
fdrag[0] = gamma1*v(i,0);
|
||||
fdrag[1] = gamma1*v(i,1);
|
||||
fdrag[2] = gamma1*v(i,2);
|
||||
}
|
||||
|
||||
if (Tp_GJF) {
|
||||
fswap = 0.5*(fran[0]+d_franprev(i,0));
|
||||
d_franprev(i,0) = fran[0];
|
||||
fran[0] = fswap;
|
||||
fswap = 0.5*(fran[1]+d_franprev(i,1));
|
||||
d_franprev(i,1) = fran[1];
|
||||
fran[1] = fswap;
|
||||
fswap = 0.5*(fran[2]+d_franprev(i,2));
|
||||
d_franprev(i,2) = fran[2];
|
||||
fran[2] = fswap;
|
||||
|
||||
fdrag[0] *= gjffac;
|
||||
fdrag[1] *= gjffac;
|
||||
fdrag[2] *= gjffac;
|
||||
fran[0] *= gjffac;
|
||||
fran[1] *= gjffac;
|
||||
fran[2] *= gjffac;
|
||||
f(i,0) *= gjffac;
|
||||
f(i,1) *= gjffac;
|
||||
f(i,2) *= gjffac;
|
||||
}
|
||||
|
||||
f(i,0) += fdrag[0] + fran[0];
|
||||
f(i,1) += fdrag[1] + fran[1];
|
||||
f(i,2) += fdrag[2] + fran[2];
|
||||
|
||||
if (Tp_TALLY) {
|
||||
d_flangevin(i,0) = fdrag[0] + fran[0];
|
||||
d_flangevin(i,1) = fdrag[1] + fran[1];
|
||||
d_flangevin(i,2) = fdrag[2] + fran[2];
|
||||
}
|
||||
|
||||
if (Tp_ZERO) {
|
||||
fsum.fx = fran[0];
|
||||
fsum.fy = fran[1];
|
||||
fsum.fz = fran[2];
|
||||
}
|
||||
rand_pool.free_state(rand_gen);
|
||||
}
|
||||
|
||||
return fsum;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
|
||||
{
|
||||
if (mask[i] & groupbit) {
|
||||
f(i,0) -= d_fsumall[0];
|
||||
f(i,1) -= d_fsumall[1];
|
||||
f(i,2) -= d_fsumall[2];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set current t_target and t_sqrt
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::compute_target()
|
||||
{
|
||||
atomKK->sync(Host, MASK_MASK);
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
double delta = update->ntimestep - update->beginstep;
|
||||
if (delta != 0.0) delta /= update->endstep - update->beginstep;
|
||||
|
||||
// if variable temp, evaluate variable, wrap with clear/add
|
||||
// reallocate tforce array if necessary
|
||||
|
||||
if (tstyle == CONSTANT) {
|
||||
t_target = t_start + delta * (t_stop-t_start);
|
||||
tsqrt = sqrt(t_target);
|
||||
} else {
|
||||
modify->clearstep_compute();
|
||||
if (tstyle == EQUAL) {
|
||||
t_target = input->variable->compute_equal(tvar);
|
||||
if (t_target < 0.0)
|
||||
error->one(FLERR,"Fix langevin variable returned negative temperature");
|
||||
tsqrt = sqrt(t_target);
|
||||
} else {
|
||||
if (nlocal > maxatom2) {
|
||||
maxatom2 = atom->nmax;
|
||||
memory->destroy_kokkos(k_tforce,tforce);
|
||||
memory->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce");
|
||||
d_tforce = k_tforce.template view<DeviceType>();
|
||||
h_tforce = k_tforce.template view<LMPHostType>();
|
||||
}
|
||||
input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host
|
||||
k_tforce.template modify<LMPHostType>();
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
if (h_tforce[i] < 0.0)
|
||||
error->one(FLERR,
|
||||
"Fix langevin variable returned negative temperature");
|
||||
}
|
||||
modify->addstep_compute(update->ntimestep + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::reset_dt()
|
||||
{
|
||||
if (atomKK->mass) {
|
||||
for (int i = 1; i <= atomKK->ntypes; i++) {
|
||||
h_gfactor2[i] = sqrt(atomKK->mass[i]) *
|
||||
sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
|
||||
force->ftm2v;
|
||||
h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]);
|
||||
}
|
||||
k_gfactor2.template modify<LMPHostType>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double FixLangevinKokkos<DeviceType>::compute_scalar()
|
||||
{
|
||||
if (!tallyflag || flangevin == NULL) return 0.0;
|
||||
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
|
||||
// capture the very first energy transfer to thermal reservoir
|
||||
|
||||
if (update->ntimestep == update->beginstep) {
|
||||
energy_onestep = 0.0;
|
||||
atomKK->sync(execution_space,V_MASK | MASK_MASK);
|
||||
int nlocal = atomKK->nlocal;
|
||||
k_flangevin.template sync<DeviceType>();
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
|
||||
DeviceType::fence();
|
||||
energy = 0.5*energy_onestep*update->dt;
|
||||
}
|
||||
|
||||
// convert midstep energy back to previous fullstep energy
|
||||
double energy_me = energy - 0.5*energy_onestep*update->dt;
|
||||
double energy_all;
|
||||
MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
return -energy_all;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const
|
||||
{
|
||||
double energy;
|
||||
if (mask[i] & groupbit)
|
||||
energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
|
||||
d_flangevin(i,2)*v(i,2);
|
||||
return energy;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
tally energy transfer to thermal reservoir
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::end_of_step()
|
||||
{
|
||||
if (!tallyflag) return;
|
||||
|
||||
v = atomKK->k_v.template view<DeviceType>();
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
|
||||
atomKK->sync(execution_space,V_MASK | MASK_MASK);
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
energy_onestep = 0.0;
|
||||
|
||||
k_flangevin.template sync<DeviceType>();
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
|
||||
DeviceType::fence();
|
||||
|
||||
energy += energy_onestep*update->dt;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy values within local atom-based array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
|
||||
{
|
||||
for (int m = 0; m < nvalues; m++)
|
||||
h_franprev(j,m) = h_franprev(i,m);
|
||||
|
||||
k_franprev.template modify<LMPHostType>();
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::cleanup_copy()
|
||||
{
|
||||
random = NULL;
|
||||
tstr = NULL;
|
||||
gfactor1 = NULL;
|
||||
gfactor2 = NULL;
|
||||
ratio = NULL;
|
||||
id_temp = NULL;
|
||||
flangevin = NULL;
|
||||
tforce = NULL;
|
||||
gjfflag = 0;
|
||||
franprev = NULL;
|
||||
id = style = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
template class FixLangevinKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixLangevinKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,266 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(langevin/kk,FixLangevinKokkos<LMPDeviceType>)
|
||||
FixStyle(langevin/kk/device,FixLangevinKokkos<LMPDeviceType>)
|
||||
FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_LANGEVIN_KOKKOS_H
|
||||
#define LMP_FIX_LANGEVIN_KOKKOS_H
|
||||
|
||||
#include "fix_langevin.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "Kokkos_Random.hpp"
|
||||
#include "comm_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct s_FSUM {
|
||||
double fx, fy, fz;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_FSUM() {
|
||||
fx = fy = fz = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_FSUM& operator+=(const s_FSUM &rhs){
|
||||
fx += rhs.fx;
|
||||
fy += rhs.fy;
|
||||
fz += rhs.fz;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
typedef s_FSUM FSUM;
|
||||
|
||||
template<class DeviceType>
|
||||
class FixLangevinKokkos;
|
||||
|
||||
template<class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
|
||||
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
|
||||
class FixLangevinKokkosPostForceFunctor;
|
||||
|
||||
template<class DeviceType> class FixLangevinKokkosZeroForceFunctor;
|
||||
|
||||
template<class DeviceType> class FixLangevinKokkosTallyEnergyFunctor;
|
||||
|
||||
template<class DeviceType>
|
||||
class FixLangevinKokkos : public FixLangevin {
|
||||
public:
|
||||
FixLangevinKokkos(class LAMMPS *, int, char **);
|
||||
~FixLangevinKokkos();
|
||||
|
||||
void cleanup_copy();
|
||||
void init();
|
||||
void post_force(int);
|
||||
void reset_dt();
|
||||
void grow_arrays(int);
|
||||
void copy_arrays(int i, int j, int delflag);
|
||||
double compute_scalar();
|
||||
void end_of_step();
|
||||
|
||||
template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
|
||||
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
FSUM post_force_item(int) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void zero_force_item(int) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_energy_item(int) const;
|
||||
|
||||
private:
|
||||
class CommKokkos *commKK;
|
||||
class AtomKokkos *atomKK;
|
||||
double *rmass;
|
||||
typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev;
|
||||
typename ArrayTypes<DeviceType>::t_double_2d d_franprev;
|
||||
HAT::t_double_2d h_franprev;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_double_2d k_flangevin;
|
||||
typename ArrayTypes<DeviceType>::t_double_2d d_flangevin;
|
||||
HAT::t_double_2d h_flangevin;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_double_1d k_tforce;
|
||||
typename ArrayTypes<DeviceType>::t_double_1d d_tforce;
|
||||
HAT::t_double_1d h_tforce;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_double_1d k_gfactor1, k_gfactor2, k_ratio;
|
||||
typename ArrayTypes<DeviceType>::t_double_1d d_gfactor1, d_gfactor2, d_ratio;
|
||||
HAT::t_double_1d h_gfactor1, h_gfactor2, h_ratio;
|
||||
|
||||
typedef Kokkos::DualView<double[3], DeviceType>
|
||||
tdual_double_1d_3n;
|
||||
tdual_double_1d_3n k_fsumall;
|
||||
typename tdual_double_1d_3n::t_dev d_fsumall;
|
||||
typename tdual_double_1d_3n::t_host h_fsumall;
|
||||
|
||||
double boltz,dt,mvv2e,ftm2v,fran_prop_const;
|
||||
|
||||
void compute_target();
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
|
||||
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
|
||||
|
||||
};
|
||||
|
||||
template <class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
|
||||
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
|
||||
struct FixLangevinKokkosPostForceFunctor {
|
||||
|
||||
typedef DeviceType device_type;
|
||||
typedef FSUM value_type;
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
|
||||
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {}
|
||||
~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
|
||||
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &fsum) const {
|
||||
|
||||
fsum += c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
|
||||
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.fx = 0.0;
|
||||
update.fy = 0.0;
|
||||
update.fz = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.fx += source.fx;
|
||||
update.fy += source.fy;
|
||||
update.fz += source.fz;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
struct FixLangevinKokkosZeroForceFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
|
||||
FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
c.zero_force_item(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct FixLangevinKokkosTallyEnergyFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixLangevinKokkos<DeviceType> c;
|
||||
typedef double value_type;
|
||||
FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy) const {
|
||||
energy += c.compute_energy_item(i);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update += source;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Fix langevin period must be > 0.0
|
||||
|
||||
The time window for temperature relaxation must be > 0
|
||||
|
||||
E: Fix langevin omega requires atom style sphere
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Fix langevin angmom requires atom style ellipsoid
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Variable name for fix langevin does not exist
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Variable for fix langevin is invalid style
|
||||
|
||||
It must be an equal-style variable.
|
||||
|
||||
E: Fix langevin omega requires extended particles
|
||||
|
||||
One of the particles has radius 0.0.
|
||||
|
||||
E: Fix langevin angmom requires extended particles
|
||||
|
||||
This fix option cannot be used with point paritlces.
|
||||
|
||||
E: Cannot zero Langevin force of 0 atoms
|
||||
|
||||
The group has zero atoms, so you cannot request its force
|
||||
be zeroed.
|
||||
|
||||
E: Fix langevin variable returned negative temperature
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Could not find fix_modify temperature ID
|
||||
|
||||
The compute ID for computing temperature does not exist.
|
||||
|
||||
E: Fix_modify temperature ID does not compute temperature
|
||||
|
||||
The compute ID assigned to the fix must compute temperature.
|
||||
|
||||
W: Group for fix_modify temp != fix group
|
||||
|
||||
The fix_modify command is specifying a temperature computation that
|
||||
computes a temperature on a different group of atoms than the fix
|
||||
itself operates on. This is probably not what you want to do.
|
||||
|
||||
*/
|
|
@ -172,6 +172,6 @@ void FixNVEKokkos<DeviceType>::cleanup_copy()
|
|||
}
|
||||
|
||||
template class FixNVEKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class FixNVEKokkos<LMPHostType>;
|
||||
#endif
|
||||
|
|
|
@ -23,8 +23,6 @@
|
|||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{FULL,HALFTHREAD,HALF};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
@ -49,13 +47,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
strcmp(arg[iarg],"gpus") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
int ngpu = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
int skip_gpu = 9999;
|
||||
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
|
||||
skip_gpu = atoi(arg[iarg+2]);
|
||||
iarg++;
|
||||
}
|
||||
iarg += 2;
|
||||
|
||||
char *str;
|
||||
if (str = getenv("SLURM_LOCALID")) {
|
||||
|
@ -89,7 +87,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
|
||||
// initialize Kokkos
|
||||
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
|
||||
Kokkos::Cuda::SelectDevice select_device(device);
|
||||
Kokkos::Cuda::initialize(select_device);
|
||||
|
@ -112,7 +110,7 @@ KokkosLMP::~KokkosLMP()
|
|||
{
|
||||
// finalize Kokkos
|
||||
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::Cuda::host_mirror_device_type::finalize();
|
||||
#else
|
||||
|
|
|
@ -14,71 +14,43 @@
|
|||
#ifndef LMP_LMPTYPE_KOKKOS_H
|
||||
#define LMP_LMPTYPE_KOKKOS_H
|
||||
|
||||
#include <Kokkos_View.hpp>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Vectorization.hpp>
|
||||
|
||||
#define MAX_TYPES_STACKPARAMS 12
|
||||
#define NeighClusterSize 8
|
||||
// set LMPHostype and LMPDeviceType
|
||||
|
||||
#ifndef DEVICE
|
||||
#define DEVICE 1
|
||||
#ifndef __CUDACC__
|
||||
struct double2 {
|
||||
double x, y;
|
||||
};
|
||||
struct float2 {
|
||||
float x, y;
|
||||
};
|
||||
struct double4 {
|
||||
double x, y, z, w;
|
||||
};
|
||||
struct float4 {
|
||||
float x, y, z, w;
|
||||
};
|
||||
#endif
|
||||
|
||||
#if DEVICE==1
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#include "Kokkos_OpenMP.hpp"
|
||||
typedef Kokkos::OpenMP LMPDeviceType;
|
||||
typedef Kokkos::OpenMP LMPHostType;
|
||||
#else
|
||||
#include "Kokkos_Threads.hpp"
|
||||
typedef Kokkos::Threads LMPDeviceType;
|
||||
typedef Kokkos::Threads LMPHostType;
|
||||
#endif
|
||||
#ifndef __CUDACC__
|
||||
struct double2 {
|
||||
double x, y;
|
||||
};
|
||||
struct float2 {
|
||||
float x, y;
|
||||
};
|
||||
struct double4 {
|
||||
double x, y, z, w;
|
||||
};
|
||||
struct float4 {
|
||||
float x, y, z, w;
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
#include "cuda.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "Kokkos_Cuda.hpp"
|
||||
#include "Kokkos_Threads.hpp"
|
||||
typedef Kokkos::Cuda LMPDeviceType;
|
||||
typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
|
||||
#endif
|
||||
// set LMPHostype and LMPDeviceType from Kokkos Default Types
|
||||
typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
|
||||
typedef Kokkos::DefaultExecutionSpace::host_mirror_device_type LMPHostType;
|
||||
|
||||
// set ExecutionSpace stuct with variable "space"
|
||||
|
||||
template<class Device>
|
||||
struct ExecutionSpaceFromDevice;
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
|
||||
struct ExecutionSpaceFromDevice<LMPHostType> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
|
||||
};
|
||||
#else
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Threads> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
|
||||
};
|
||||
#endif
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
|
||||
|
@ -142,16 +114,27 @@ struct s_EV_FLOAT {
|
|||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
return *this;
|
||||
void operator+=(const s_EV_FLOAT &rhs) {
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+=(const volatile s_EV_FLOAT &rhs) volatile {
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
}
|
||||
};
|
||||
typedef struct s_EV_FLOAT EV_FLOAT;
|
||||
|
@ -240,7 +223,7 @@ typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
|
|||
typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_dev t_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
|
||||
|
@ -249,7 +232,16 @@ typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
|
|||
typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_tagint_2d;
|
||||
typedef tdual_tagint_2d::t_dev t_tagint_2d;
|
||||
typedef tdual_tagint_2d::t_dev_const t_tagint_2d_const;
|
||||
typedef tdual_tagint_2d::t_dev_um t_tagint_2d_um;
|
||||
typedef tdual_tagint_2d::t_dev_const_um t_tagint_2d_const_um;
|
||||
typedef tdual_tagint_2d::t_dev_const_randomread t_tagint_2d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_imageint_1d;
|
||||
typedef tdual_imageint_1d::t_dev t_imageint_1d;
|
||||
typedef tdual_imageint_1d::t_dev_const t_imageint_1d_const;
|
||||
|
@ -257,6 +249,22 @@ typedef tdual_imageint_1d::t_dev_um t_imageint_1d_um;
|
|||
typedef tdual_imageint_1d::t_dev_const_um t_imageint_1d_const_um;
|
||||
typedef tdual_imageint_1d::t_dev_const_randomread t_imageint_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
|
||||
typedef tdual_double_1d::t_dev t_double_1d;
|
||||
typedef tdual_double_1d::t_dev_const t_double_1d_const;
|
||||
typedef tdual_double_1d::t_dev_um t_double_1d_um;
|
||||
typedef tdual_double_1d::t_dev_const_um t_double_1d_const_um;
|
||||
typedef tdual_double_1d::t_dev_const_randomread t_double_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
|
||||
typedef tdual_double_2d::t_dev t_double_2d;
|
||||
typedef tdual_double_2d::t_dev_const t_double_2d_const;
|
||||
typedef tdual_double_2d::t_dev_um t_double_2d_um;
|
||||
typedef tdual_double_2d::t_dev_const_um t_double_2d_const_um;
|
||||
typedef tdual_double_2d::t_dev_const_randomread t_double_2d_randomread;
|
||||
|
||||
// 1d float array n
|
||||
|
||||
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
|
||||
|
@ -406,7 +414,7 @@ typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
|
|||
|
||||
};
|
||||
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template <>
|
||||
struct ArrayTypes<LMPHostType> {
|
||||
|
||||
|
@ -446,13 +454,40 @@ typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
|
|||
typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
|
||||
typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_imageint_1d;
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_tagint_2d;
|
||||
typedef tdual_tagint_2d::t_host t_tagint_2d;
|
||||
typedef tdual_tagint_2d::t_host_const t_tagint_2d_const;
|
||||
typedef tdual_tagint_2d::t_host_um t_tagint_2d_um;
|
||||
typedef tdual_tagint_2d::t_host_const_um t_tagint_2d_const_um;
|
||||
typedef tdual_tagint_2d::t_host_const_randomread t_tagint_2d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_imageint_1d;
|
||||
typedef tdual_imageint_1d::t_host t_imageint_1d;
|
||||
typedef tdual_imageint_1d::t_host_const t_imageint_1d_const;
|
||||
typedef tdual_imageint_1d::t_host_um t_imageint_1d_um;
|
||||
typedef tdual_imageint_1d::t_host_const_um t_imageint_1d_const_um;
|
||||
typedef tdual_imageint_1d::t_host_const_randomread t_imageint_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
|
||||
typedef tdual_double_1d::t_host t_double_1d;
|
||||
typedef tdual_double_1d::t_host_const t_double_1d_const;
|
||||
typedef tdual_double_1d::t_host_um t_double_1d_um;
|
||||
typedef tdual_double_1d::t_host_const_um t_double_1d_const_um;
|
||||
typedef tdual_double_1d::t_host_const_randomread t_double_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
|
||||
typedef tdual_double_2d::t_host t_double_2d;
|
||||
typedef tdual_double_2d::t_host_const t_double_2d_const;
|
||||
typedef tdual_double_2d::t_host_um t_double_2d_um;
|
||||
typedef tdual_double_2d::t_host_const_um t_double_2d_const_um;
|
||||
typedef tdual_double_2d::t_host_const_randomread t_double_2d_randomread;
|
||||
|
||||
//1d float array n
|
||||
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
|
||||
typedef tdual_float_1d::t_host t_float_1d;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -24,7 +25,7 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
|||
const int nall = includegroup?atom->nfirst:atom->nlocal;
|
||||
list->grow(nall);
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
|
@ -33,14 +34,46 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
|||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
atomKK->k_tag.view<DeviceType>(),
|
||||
atomKK->k_special.view<DeviceType>(),
|
||||
atomKK->k_nspecial.view<DeviceType>(),
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
bboxhi,bboxlo);
|
||||
exclude, nex_type,maxex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
@ -78,24 +111,24 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
|||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
#endif
|
||||
} else {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
|
@ -134,6 +167,62 @@ void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
|
|||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
template<class Device>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
|
||||
{
|
||||
const int n1 = nspecial(i,0);
|
||||
const int n2 = nspecial(i,1);
|
||||
const int n3 = nspecial(i,2);
|
||||
|
||||
for (int k = 0; k < n3; k++) {
|
||||
if (special(i,k) == tag(j)) {
|
||||
if (k < n1) {
|
||||
if (special_flag[1] == 0) return -1;
|
||||
else if (special_flag[1] == 1) return 0;
|
||||
else return 1;
|
||||
} else if (k < n2) {
|
||||
if (special_flag[2] == 0) return -1;
|
||||
else if (special_flag[2] == 1) return 0;
|
||||
else return 2;
|
||||
} else {
|
||||
if (special_flag[3] == 0) return -1;
|
||||
else if (special_flag[3] == 1) return 0;
|
||||
else return 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
|
||||
const int &itype,const int &jtype) const
|
||||
{
|
||||
int m;
|
||||
|
||||
if (nex_type && ex_type(itype,jtype)) return 1;
|
||||
|
||||
if (nex_group) {
|
||||
for (m = 0; m < nex_group; m++) {
|
||||
if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
|
||||
if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (nex_mol) {
|
||||
for (m = 0; m < nex_mol; m++)
|
||||
if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
|
||||
molecule(i) == molecule(j)) return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh,int GhostNewton>
|
||||
|
@ -142,7 +231,10 @@ void NeighborKokkosExecute<Device>::
|
|||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
int n = 0;
|
||||
|
||||
int which = 0;
|
||||
int moltemplate;
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
// get subview of neighbors of i
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
|
||||
|
@ -161,52 +253,81 @@ void NeighborKokkosExecute<Device>::
|
|||
if(HalfNeigh)
|
||||
for(int m = 0; m < c_bincount(ibin); m++) {
|
||||
const int j = c_bins(ibin,m);
|
||||
// printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
|
||||
const int jtype = type(j);
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
|
||||
n++;
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
// get subview of jbin
|
||||
if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
|
||||
if(HalfNeigh&&(ibin==jbin)) continue;
|
||||
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
//if(i==0)
|
||||
//printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
//if(i==0)
|
||||
//printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
|
||||
n++;
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -222,23 +343,23 @@ void NeighborKokkosExecute<Device>::
|
|||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
extern __shared__ X_FLOAT sharedmem[];
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType> template<int HalfNeigh>
|
||||
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
|
||||
__device__ inline
|
||||
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
||||
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
|
||||
{
|
||||
/* loop over atoms in i's bin,
|
||||
*/
|
||||
const int atoms_per_bin = c_bins.dimension_1();
|
||||
const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
|
||||
const int MY_BIN = threadIdx.x/atoms_per_bin;
|
||||
const int MY_II = threadIdx.x%atoms_per_bin;
|
||||
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
|
||||
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
|
||||
const int MY_BIN = dev.team_rank()/atoms_per_bin;
|
||||
|
||||
const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
|
||||
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >=c_bincount.dimension_0()) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
|
@ -248,6 +369,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
|||
|
||||
int bincount_current = c_bincount[ibin];
|
||||
|
||||
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
|
||||
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
|
||||
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
|
||||
/* if necessary, goto next page and add pages */
|
||||
|
||||
|
@ -278,17 +401,45 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
|||
#pragma unroll 4
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
//if(j==i) continue;
|
||||
if((j == i) || (HalfNeigh && (j < i))) continue;
|
||||
|
||||
if((j == i) ||
|
||||
(HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton &&
|
||||
((j < i) ||
|
||||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
@ -319,15 +470,41 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
|||
#pragma unroll 8
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
const int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
if(HalfNeigh && (j < i)) continue;
|
||||
//if(HalfNeigh && (j < i)) continue;
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
@ -343,6 +520,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
|||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -361,12 +539,45 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
|
|||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
atomKK->k_tag.view<DeviceType>(),
|
||||
atomKK->k_special.view<DeviceType>(),
|
||||
atomKK->k_nspecial.view<DeviceType>(),
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
bboxhi,bboxlo);
|
||||
exclude, nex_type,maxex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
DeviceType::fence();
|
||||
|
||||
|
@ -407,24 +618,24 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
|
|||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#if DEVICE==2
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
} else {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#if DEVICE==2
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
|
|
|
@ -113,6 +113,6 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
|
|||
}
|
||||
|
||||
template class NeighListKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NeighListKokkos<LMPHostType>;
|
||||
#endif
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
|
||||
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
|
||||
|
||||
class AtomNeighbors
|
||||
{
|
||||
|
|
|
@ -51,6 +51,16 @@ NeighborKokkos::~NeighborKokkos()
|
|||
|
||||
delete [] pair_build_device;
|
||||
delete [] pair_build_host;
|
||||
|
||||
memory->destroy_kokkos(k_ex1_type,ex1_type);
|
||||
memory->destroy_kokkos(k_ex2_type,ex2_type);
|
||||
memory->destroy_kokkos(k_ex1_group,ex1_group);
|
||||
memory->destroy_kokkos(k_ex2_group,ex2_group);
|
||||
memory->destroy_kokkos(k_ex_mol_group,ex_mol_group);
|
||||
memory->destroy_kokkos(k_ex1_bit,ex1_bit);
|
||||
memory->destroy_kokkos(k_ex2_bit,ex2_bit);
|
||||
memory->destroy_kokkos(k_ex_mol_bit,ex_mol_bit);
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -72,7 +82,7 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int NeighborKokkos::init_lists_kokkos()
|
||||
{
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
|
@ -211,6 +221,32 @@ void NeighborKokkos::init_list_grow_kokkos(int i)
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_ex_type_kokkos(int n)
|
||||
{
|
||||
memory->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type");
|
||||
k_ex_type.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_ex_bit_kokkos()
|
||||
{
|
||||
memory->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit");
|
||||
k_ex1_bit.modify<LMPHostType>();
|
||||
memory->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit");
|
||||
k_ex2_bit.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_ex_mol_bit_kokkos()
|
||||
{
|
||||
memory->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit");
|
||||
k_ex_mol_bit.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
{
|
||||
if (rq->kokkos_host != 0) {
|
||||
|
@ -264,6 +300,29 @@ void NeighborKokkos::setup_bins_kokkos(int i)
|
|||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::modify_ex_type_grow_kokkos(){
|
||||
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
|
||||
k_ex1_type.modify<LMPHostType>();
|
||||
memory->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
|
||||
k_ex2_type.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_ex_group_grow_kokkos(){
|
||||
memory->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
|
||||
k_ex1_group.modify<LMPHostType>();
|
||||
memory->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
|
||||
k_ex2_group.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_mol_group_grow_kokkos(){
|
||||
memory->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
|
||||
k_ex_mol_group.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
// include to trigger instantiation of templated functions
|
||||
|
||||
#include "neigh_full_kokkos.h"
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "neighbor.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
#include <math.h>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
|
@ -33,8 +34,15 @@ class NeighborKokkosExecute
|
|||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask;
|
||||
const typename AT::t_tagint_1d_const molecule;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
|
@ -44,38 +52,88 @@ class NeighborKokkosExecute
|
|||
|
||||
const int nlocal;
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const int maxex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const int maxex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const int maxex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<Device> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_tagint_1d_const &_molecule,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
|
||||
const NeighListKokkos<Device> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,const int & _maxex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,const int & _maxex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,const int & _maxex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),maxex_group(_maxex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
|
@ -103,10 +161,10 @@ class NeighborKokkosExecute
|
|||
KOKKOS_FUNCTION
|
||||
void build_cluster_Item(const int &i) const;
|
||||
|
||||
#if DEVICE==2
|
||||
template<int HalfNeigh>
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
__device__ inline
|
||||
void build_ItemCuda(Device dev) const;
|
||||
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
@ -143,6 +201,21 @@ class NeighborKokkosExecute
|
|||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class Device>
|
||||
|
@ -175,12 +248,12 @@ struct NeighborKokkosBuildFunctor {
|
|||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (Device dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH>(dev);
|
||||
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
|
||||
}
|
||||
size_t shmem_size() const { return sharedsize; }
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -220,15 +293,28 @@ class NeighborKokkos : public Neighbor {
|
|||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
void init_cutneighsq_kokkos(int);
|
||||
int init_lists_kokkos();
|
||||
void init_list_flags1_kokkos(int);
|
||||
void init_list_flags2_kokkos(int);
|
||||
void init_list_grow_kokkos(int);
|
||||
void init_ex_type_kokkos(int);
|
||||
void init_ex_bit_kokkos();
|
||||
void init_ex_mol_bit_kokkos();
|
||||
void choose_build(int, NeighRequest *);
|
||||
void build_kokkos(int);
|
||||
void setup_bins_kokkos(int);
|
||||
|
||||
void modify_ex_type_grow_kokkos();
|
||||
void modify_ex_group_grow_kokkos();
|
||||
void modify_mol_group_grow_kokkos();
|
||||
|
||||
typedef void (NeighborKokkos::*PairPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *);
|
||||
PairPtrHost *pair_build_host;
|
||||
|
|
|
@ -0,0 +1,266 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_coul_cut_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define KOKKOS_CUDA_MAX_THREADS 256
|
||||
#define KOKKOS_CUDA_MIN_BLOCKS 8
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairCoulCutKokkos<DeviceType>::PairCoulCutKokkos(LAMMPS *lmp) : PairCoulCut(lmp)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
cutsq = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairCoulCutKokkos<DeviceType>::~PairCoulCutKokkos()
|
||||
{
|
||||
if (allocated)
|
||||
memory->destroy_kokkos(k_cutsq, cutsq);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairCoulCutKokkos<DeviceType>::cleanup_copy() {
|
||||
// WHY needed: this prevents parent copy from deallocating any arrays
|
||||
allocated = 0;
|
||||
cutsq = NULL;
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
|
||||
double ecoul = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_cut_ljsq.template sync<DeviceType>();
|
||||
k_cut_coulsq.template sync<DeviceType>();
|
||||
k_params.template sync<DeviceType>();
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
c_x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
q = atomKK->k_q.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
newton_pair = force->newton_pair;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
special_coul[0] = force->special_coul[0];
|
||||
special_coul[1] = force->special_coul[1];
|
||||
special_coul[2] = force->special_coul[2];
|
||||
special_coul[3] = force->special_coul[3];
|
||||
qqrd2e = force->qqrd2e;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void >
|
||||
(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag) eng_coul += ev.ecoul;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairCoulCutKokkos<DeviceType>::
|
||||
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
|
||||
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT rinv = sqrt(r2inv);
|
||||
F_FLOAT forcecoul;
|
||||
|
||||
forcecoul = qqrd2e*(STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)*
|
||||
qtmp *q(j) *rinv;
|
||||
|
||||
return factor_coul*forcecoul*r2inv;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairCoulCutKokkos<DeviceType>::
|
||||
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
|
||||
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT rinv = sqrt(r2inv);
|
||||
|
||||
return factor_coul*qqrd2e * (STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)
|
||||
* qtmp *q(j)*rinv;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairCoulCutKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairCoulCut::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
memory->destroy(cutsq);
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
|
||||
k_cut_ljsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_ljsq",n+1,n+1);
|
||||
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
|
||||
k_cut_coulsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_coulsq",n+1,n+1);
|
||||
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
|
||||
|
||||
k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulCut::params",n+1,n+1);
|
||||
params = k_params.d_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
|
||||
{
|
||||
// \todo check what should be the limit on narg
|
||||
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
PairCoulCut::settings(1,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairCoulCutKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairCoulCut::init_style();
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairCoulCutKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairCoulCut::init_one(i,j);
|
||||
|
||||
k_params.h_view(i,j).scale = scale[i][j];
|
||||
k_params.h_view(i,j).cutsq = cutone*cutone;
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cutone*cutone;
|
||||
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cutone*cutone;
|
||||
}
|
||||
k_cutsq.h_view(i,j) = cutone*cutone;
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
k_cut_ljsq.h_view(i,j) = cutone*cutone;
|
||||
k_cut_ljsq.template modify<LMPHostType>();
|
||||
k_cut_coulsq.h_view(i,j) = cutone*cutone;
|
||||
k_cut_coulsq.template modify<LMPHostType>();
|
||||
k_params.template modify<LMPHostType>();
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template class PairCoulCutKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairCoulCutKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,137 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(coul/cut/kk,PairCoulCutKokkos<LMPDeviceType>)
|
||||
PairStyle(coul/cut/kk/device,PairCoulCutKokkos<LMPDeviceType>)
|
||||
PairStyle(coul/cut/kk/host,PairCoulCutKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_COUL_CUT_KOKKOS_H
|
||||
#define LMP_PAIR_COUL_CUT_KOKKOS_H
|
||||
|
||||
#include "pair_kokkos.h"
|
||||
#include "pair_coul_cut.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairCoulCutKokkos : public PairCoulCut {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF};
|
||||
enum {COUL_FLAG=1};
|
||||
typedef DeviceType device_type;
|
||||
PairCoulCutKokkos(class LAMMPS *);
|
||||
~PairCoulCutKokkos();
|
||||
|
||||
void compute(int, int);
|
||||
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
|
||||
struct params_coul{
|
||||
params_coul(){cutsq=0,scale=0;};
|
||||
params_coul(int i){cutsq=0,scale=0;};
|
||||
F_FLOAT cutsq, scale;
|
||||
};
|
||||
|
||||
protected:
|
||||
void cleanup_copy();
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const{
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_coul**,
|
||||
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
|
||||
// hardwired to space for 15 atom types
|
||||
params_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array c_x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
int newton_pair;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
|
||||
|
||||
class AtomKokkos *atomKK;
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
|
||||
double special_coul[4];
|
||||
double special_lj[4];
|
||||
double qqrd2e;
|
||||
|
||||
void allocate();
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
|
||||
NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -25,27 +25,72 @@
|
|||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int Table>
|
||||
struct CoulLongTable {
|
||||
enum {DoTable = Table};
|
||||
};
|
||||
|
||||
// Tags for doing coulomb calculations or not
|
||||
// They facilitate function overloading, since
|
||||
// partial template specialization of member functions is not allowed
|
||||
struct CoulTag {};
|
||||
struct NoCoulTag {};
|
||||
|
||||
template<int FLAG>
|
||||
struct DoCoul {
|
||||
typedef NoCoulTag type;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DoCoul<1> {
|
||||
typedef CoulTag type;
|
||||
};
|
||||
|
||||
// Determine memory traits for force array
|
||||
// Do atomic trait when running HALFTHREAD neighbor list style
|
||||
template<int NEIGHFLAG>
|
||||
struct AtomicF {
|
||||
enum {value = Kokkos::Unmanaged};
|
||||
};
|
||||
|
||||
template<>
|
||||
struct AtomicF<HALFTHREAD> {
|
||||
enum {value = Kokkos::Atomic|Kokkos::Unmanaged};
|
||||
};
|
||||
|
||||
//Specialisation for Neighborlist types Half, HalfThread, Full
|
||||
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
|
||||
struct PairComputeFunctor {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
|
||||
// Reduction type, contains evdwl, ecoul and virial[6]
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
// The copy of the pair style
|
||||
PairStyle c;
|
||||
|
||||
// The force array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
|
||||
device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
c(*c_ptr),f(c.f),list(*list_ptr) {};
|
||||
|
||||
// Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
// Loop over neighbors of one atom without coulomb interaction
|
||||
// This function is called in parallel
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
|
@ -77,25 +122,17 @@ struct PairComputeFunctor {
|
|||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
|
||||
Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
|
||||
Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
|
||||
}
|
||||
|
||||
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
c.f(j,0) -= delx*fpair;
|
||||
c.f(j,1) -= dely*fpair;
|
||||
c.f(j,2) -= delz*fpair;
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
f(j,0) -= delx*fpair;
|
||||
f(j,1) -= dely*fpair;
|
||||
f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if (EVFLAG) {
|
||||
if (c.eflag) {
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
|
@ -103,16 +140,84 @@ struct PairComputeFunctor {
|
|||
}
|
||||
|
||||
}
|
||||
if (NEIGHFLAG == HALFTHREAD) {
|
||||
Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
|
||||
Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
|
||||
Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
|
||||
} else {
|
||||
c.f(i,0) += fxtmp;
|
||||
c.f(i,1) += fytmp;
|
||||
c.f(i,2) += fztmp;
|
||||
|
||||
f(i,0) += fxtmp;
|
||||
f(i,1) += fytmp;
|
||||
f(i,2) += fztmp;
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Loop over neighbors of one atom with coulomb interaction
|
||||
// This function is called in parallel
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
const F_FLOAT qtmp = c.q(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT fxtmp = 0.0;
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
F_FLOAT fpair = F_FLOAT();
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
||||
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
|
||||
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
f(j,0) -= delx*fpair;
|
||||
f(j,1) -= dely*fpair;
|
||||
f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if (EVFLAG) {
|
||||
if (c.eflag) {
|
||||
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
|
||||
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(i,0) += fxtmp;
|
||||
f(i,1) += fytmp;
|
||||
f(i,2) += fztmp;
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
@ -142,7 +247,7 @@ struct PairComputeFunctor {
|
|||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
if (NEIGHFLAG) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
|
@ -202,43 +307,17 @@ struct PairComputeFunctor {
|
|||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (c.newton_pair) compute_item<0,1>(i,list);
|
||||
else compute_item<0,0>(i,list);
|
||||
if (c.newton_pair) compute_item<0,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(i,list);
|
||||
energy_virial += compute_item<1,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else
|
||||
energy_virial += compute_item<1,0>(i,list);
|
||||
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
|
@ -261,8 +340,8 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
|||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const device_type& dev,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = vectorization::global_thread_rank(dev);
|
||||
|
||||
|
@ -302,9 +381,6 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
|||
if (c.eflag) {
|
||||
ev.evdwl += 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += 0.5*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
|
@ -373,44 +449,18 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
|||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const device_type& dev) const {
|
||||
if (c.newton_pair) compute_item<0,1>(dev,list);
|
||||
else compute_item<0,0>(dev,list);
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
|
||||
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const device_type& dev, value_type &energy_virial) const {
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(dev,list);
|
||||
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else
|
||||
energy_virial += compute_item<1,0>(dev,list);
|
||||
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
|
@ -433,7 +483,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
|||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
|
||||
(void) list;
|
||||
EV_FLOAT ev;
|
||||
const int i = ii;//list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
|
@ -470,9 +521,6 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
|||
if (c.eflag) {
|
||||
ev.evdwl += 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += 0.5*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
|
@ -535,116 +583,156 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
|||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
compute_item<0,0>(i,list);
|
||||
compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
energy_virial += compute_item<1,0>(i,list);
|
||||
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
// Filter out Neighflags which are not supported for PairStyle
|
||||
// The enable_if clause will invalidate the last parameter of the function, so that
|
||||
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
|
||||
// This uses the fact that failure to match template parameters is not an error.
|
||||
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
|
||||
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
|
||||
// or the real one further below.
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(NEIGHFLAG&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
(void) fpair;
|
||||
(void) list;
|
||||
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",NEIGHFLAG,PairStyle::EnabledNeighFlags);
|
||||
return ev;
|
||||
}
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(FULLCLUSTER&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
(void) fpair;
|
||||
(void) list;
|
||||
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",FULLCLUSTER,PairStyle::EnabledNeighFlags);
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<NEIGHFLAG&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<FULLCLUSTER&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
} else {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
if (fpair->neighflag == FULL) {
|
||||
PairComputeFunctor<PairStyle,FULL,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALF) {
|
||||
PairComputeFunctor<PairStyle,HALF,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
PairComputeFunctor<PairStyle,N2,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
|
||||
else Kokkos::parallel_for(fpair->nlocal,ff);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
|
||||
}
|
||||
} else {
|
||||
if (fpair->neighflag == FULL) {
|
||||
PairComputeFunctor<PairStyle,FULL,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALF) {
|
||||
PairComputeFunctor<PairStyle,HALF,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
PairComputeFunctor<PairStyle,N2,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
|
||||
else Kokkos::parallel_for(fpair->nlocal,ff);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
|
||||
}
|
||||
if (fpair->neighflag == FULL) {
|
||||
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == HALF) {
|
||||
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
struct PairVirialFDotRCompute {
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
typename AT::t_x_array_const x;
|
||||
typename AT::t_f_array_const f;
|
||||
const int offset;
|
||||
|
||||
PairVirialFDotRCompute( typename AT::t_x_array_const x_,
|
||||
typename AT::t_f_array_const f_,
|
||||
const int offset_):x(x_),f(f_),offset(offset_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int j, value_type &energy_virial) const {
|
||||
const int i = j + offset;
|
||||
energy_virial.v[0] += f(i,0)*x(i,0);
|
||||
energy_virial.v[1] += f(i,1)*x(i,1);
|
||||
energy_virial.v[2] += f(i,2)*x(i,2);
|
||||
energy_virial.v[3] += f(i,1)*x(i,0);
|
||||
energy_virial.v[4] += f(i,2)*x(i,0);
|
||||
energy_virial.v[5] += f(i,2)*x(i,1);
|
||||
}
|
||||
};
|
||||
|
||||
template<class PairStyle>
|
||||
void pair_virial_fdotr_compute(PairStyle* fpair) {
|
||||
EV_FLOAT virial;
|
||||
if (fpair->neighbor->includegroup == 0) {
|
||||
int nall = fpair->atom->nlocal + fpair->atom->nghost;
|
||||
Kokkos::parallel_reduce(nall,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
|
||||
} else {
|
||||
Kokkos::parallel_reduce(fpair->atom->nfirst,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
|
||||
EV_FLOAT virial_ghost;
|
||||
Kokkos::parallel_reduce(fpair->atom->nghost,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,fpair->atom->nlocal),virial_ghost);
|
||||
virial+=virial_ghost;
|
||||
}
|
||||
fpair->vflag_fdotr = 0;
|
||||
fpair->virial[0] = virial.v[0];
|
||||
fpair->virial[1] = virial.v[1];
|
||||
fpair->virial[2] = virial.v[2];
|
||||
fpair->virial[3] = virial.v[3];
|
||||
fpair->virial[4] = virial.v[4];
|
||||
fpair->virial[5] = virial.v[5];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,347 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_lj_cut_coul_cut_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define KOKKOS_CUDA_MAX_THREADS 256
|
||||
#define KOKKOS_CUDA_MIN_BLOCKS 8
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutCoulCutKokkos<DeviceType>::PairLJCutCoulCutKokkos(LAMMPS *lmp):PairLJCutCoulCut(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
cutsq = NULL;
|
||||
cut_ljsq = NULL;
|
||||
cut_coulsq = NULL;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutCoulCutKokkos<DeviceType>::~PairLJCutCoulCutKokkos()
|
||||
{
|
||||
if (allocated){
|
||||
memory->destroy_kokkos(k_cutsq, cutsq);
|
||||
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
|
||||
memory->destroy_kokkos(k_cut_coulsq, cut_coulsq);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulCutKokkos<DeviceType>::cleanup_copy() {
|
||||
// WHY needed: this prevents parent copy from deallocating any arrays
|
||||
allocated = 0;
|
||||
cutsq = NULL;
|
||||
cut_ljsq = NULL;
|
||||
cut_coulsq = NULL;
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
|
||||
double evdwl = 0.0;
|
||||
double ecoul = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_cut_ljsq.template sync<DeviceType>();
|
||||
k_cut_coulsq.template sync<DeviceType>();
|
||||
k_params.template sync<DeviceType>();
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
c_x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
q = atomKK->k_q.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
special_coul[0] = force->special_coul[0];
|
||||
special_coul[1] = force->special_coul[1];
|
||||
special_coul[2] = force->special_coul[2];
|
||||
special_coul[3] = force->special_coul[3];
|
||||
qqrd2e = force->qqrd2e;
|
||||
newton_pair = force->newton_pair;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void >
|
||||
(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag) {
|
||||
eng_vdwl += ev.evdwl;
|
||||
eng_coul += ev.ecoul;
|
||||
}
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute LJ 12-6 pair force between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
F_FLOAT forcelj;
|
||||
|
||||
forcelj = r6inv *
|
||||
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
|
||||
|
||||
return forcelj*r2inv;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute coulomb pair force between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
|
||||
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT rinv = sqrt(r2inv);
|
||||
F_FLOAT forcecoul;
|
||||
|
||||
forcecoul = qqrd2e*qtmp*q(j) *rinv;
|
||||
|
||||
return factor_coul*forcecoul*r2inv;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute LJ 12-6 pair potential energy between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
|
||||
return r6inv*
|
||||
((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
|
||||
- (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
|
||||
- (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute coulomb pair potential energy between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
|
||||
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT rinv = sqrt(r2inv);
|
||||
|
||||
return factor_coul*qqrd2e*qtmp*q(j)*rinv;
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulCutKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairLJCutCoulCut::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
memory->destroy(cutsq);
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
memory->destroy(cut_ljsq);
|
||||
memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
|
||||
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
|
||||
memory->destroy(cut_coulsq);
|
||||
memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq");
|
||||
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
|
||||
k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulCut::params",n+1,n+1);
|
||||
params = k_params.d_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
PairLJCutCoulCut::settings(1,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulCutKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairLJCutCoulCut::init_style();
|
||||
|
||||
// error if rRESPA with inner levels
|
||||
|
||||
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
|
||||
int respa = 0;
|
||||
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
|
||||
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
|
||||
if (respa)
|
||||
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
|
||||
}
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairLJCutCoulCut::init_one(i,j);
|
||||
double cut_ljsqm = cut_ljsq[i][j];
|
||||
double cut_coulsqm = cut_coulsq[i][j];
|
||||
|
||||
k_params.h_view(i,j).lj1 = lj1[i][j];
|
||||
k_params.h_view(i,j).lj2 = lj2[i][j];
|
||||
k_params.h_view(i,j).lj3 = lj3[i][j];
|
||||
k_params.h_view(i,j).lj4 = lj4[i][j];
|
||||
k_params.h_view(i,j).offset = offset[i][j];
|
||||
k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
|
||||
k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
|
||||
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
|
||||
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
|
||||
}
|
||||
k_cutsq.h_view(i,j) = cutone*cutone;
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
k_cut_ljsq.h_view(i,j) = cut_ljsqm;
|
||||
k_cut_ljsq.template modify<LMPHostType>();
|
||||
k_cut_coulsq.h_view(i,j) = cut_coulsqm;
|
||||
k_cut_coulsq.template modify<LMPHostType>();
|
||||
k_params.template modify<LMPHostType>();
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template class PairLJCutCoulCutKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairLJCutCoulCutKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,131 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/coul/cut/kk,PairLJCutCoulCutKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/coul/cut/kk/device,PairLJCutCoulCutKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/coul/cut/kk/host,PairLJCutCoulCutKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
|
||||
#define LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
|
||||
|
||||
#include "pair_kokkos.h"
|
||||
#include "pair_lj_cut_coul_cut.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
|
||||
enum {COUL_FLAG=1};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutCoulCutKokkos(class LAMMPS *);
|
||||
~PairLJCutCoulCutKokkos();
|
||||
|
||||
void compute(int, int);
|
||||
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
|
||||
struct params_lj_coul{
|
||||
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
|
||||
};
|
||||
|
||||
protected:
|
||||
void cleanup_copy();
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_lj_coul**,
|
||||
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
|
||||
// hardwired to space for 15 atom types
|
||||
params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array c_x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
int newton_pair;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
|
||||
|
||||
class AtomKokkos *atomKK;
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
|
||||
double special_coul[4];
|
||||
double special_lj[4];
|
||||
double qqrd2e;
|
||||
|
||||
void allocate();
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>;
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*,
|
||||
NeighListKokkos<DeviceType>*);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,496 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_lj_cut_coul_long_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define KOKKOS_CUDA_MAX_THREADS 256
|
||||
#define KOKKOS_CUDA_MIN_BLOCKS 8
|
||||
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJCutCoulLong(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
cutsq = NULL;
|
||||
cut_ljsq = NULL;
|
||||
cut_coulsq = NULL;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos()
|
||||
{
|
||||
if (allocated){
|
||||
memory->destroy_kokkos(k_cutsq, cutsq);
|
||||
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::cleanup_copy() {
|
||||
// WHY needed: this prevents parent copy from deallocating any arrays
|
||||
allocated = 0;
|
||||
cutsq = NULL;
|
||||
cut_ljsq = NULL;
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
ftable = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
|
||||
double evdwl = 0.0;
|
||||
double ecoul = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_cut_ljsq.template sync<DeviceType>();
|
||||
k_cut_coulsq.template sync<DeviceType>();
|
||||
k_params.template sync<DeviceType>();
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
c_x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
q = atomKK->k_q.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
special_coul[0] = force->special_coul[0];
|
||||
special_coul[1] = force->special_coul[1];
|
||||
special_coul[2] = force->special_coul[2];
|
||||
special_coul[3] = force->special_coul[3];
|
||||
qqrd2e = force->qqrd2e;
|
||||
newton_pair = force->newton_pair;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev;
|
||||
if(ncoultablebits)
|
||||
ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<1> >
|
||||
(this,(NeighListKokkos<DeviceType>*)list);
|
||||
else
|
||||
ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<0> >
|
||||
(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag) {
|
||||
eng_vdwl += ev.evdwl;
|
||||
eng_coul += ev.ecoul;
|
||||
}
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute LJ 12-6 pair force between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
F_FLOAT forcelj;
|
||||
|
||||
forcelj = r6inv *
|
||||
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
|
||||
|
||||
return forcelj*r2inv;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute coulomb pair force between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
|
||||
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
if(Specialisation::DoTable && rsq > tabinnersq) {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
|
||||
const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
|
||||
const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
|
||||
F_FLOAT forcecoul = qtmp*q[j] * table;
|
||||
if (factor_coul < 1.0) {
|
||||
const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
|
||||
const F_FLOAT prefactor = qtmp*q[j] * table;
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
return forcecoul/rsq;
|
||||
} else {
|
||||
const F_FLOAT r = sqrt(rsq);
|
||||
const F_FLOAT grij = g_ewald * r;
|
||||
const F_FLOAT expm2 = exp(-grij*grij);
|
||||
const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
const F_FLOAT rinv = 1.0/r;
|
||||
const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
|
||||
F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
|
||||
return forcecoul*rinv*rinv;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute LJ 12-6 pair potential energy between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
|
||||
return r6inv*
|
||||
((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
|
||||
- (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
|
||||
- (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute coulomb pair potential energy between atoms i and j
|
||||
---------------------------------------------------------------------- */
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
|
||||
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
|
||||
if(Specialisation::DoTable) {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
|
||||
const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
|
||||
const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
|
||||
F_FLOAT ecoul = qtmp*q[j] * table;
|
||||
if (factor_coul < 1.0) {
|
||||
const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
|
||||
const F_FLOAT prefactor = qtmp*q[j] * table;
|
||||
ecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
return ecoul;
|
||||
} else {
|
||||
const F_FLOAT r = sqrt(rsq);
|
||||
const F_FLOAT grij = g_ewald * r;
|
||||
const F_FLOAT expm2 = exp(-grij*grij);
|
||||
const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
|
||||
F_FLOAT ecoul = prefactor * erfc;
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
return ecoul;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairLJCutCoulLong::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
memory->destroy(cutsq);
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
memory->destroy(cut_ljsq);
|
||||
memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
|
||||
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
|
||||
|
||||
memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq");
|
||||
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
|
||||
k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulLong::params",n+1,n+1);
|
||||
params = k_params.d_view;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
|
||||
{
|
||||
Pair::init_tables(cut_coul,cut_respa);
|
||||
|
||||
typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
|
||||
typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
|
||||
|
||||
int ntable = 1;
|
||||
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
|
||||
|
||||
|
||||
// Copy rtable and drtable
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = rtable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_rtable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = drtable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_drtable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
// Copy ftable and dftable
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = ftable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_ftable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = dftable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_dftable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
// Copy ctable and dctable
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = ctable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_ctable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = dctable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_dctable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
// Copy etable and detable
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = etable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_etable = d_table;
|
||||
}
|
||||
|
||||
{
|
||||
host_table_type h_table("HostTable",ntable);
|
||||
table_type d_table("DeviceTable",ntable);
|
||||
|
||||
for(int i = 0; i < ntable; i++) {
|
||||
h_table(i) = detable[i];
|
||||
}
|
||||
Kokkos::deep_copy(d_table,h_table);
|
||||
d_detable = d_table;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
PairLJCutCoulLong::settings(narg,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutCoulLongKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairLJCutCoulLong::init_style();
|
||||
|
||||
// error if rRESPA with inner levels
|
||||
|
||||
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
|
||||
int respa = 0;
|
||||
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
|
||||
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
|
||||
if (respa)
|
||||
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
|
||||
}
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairLJCutCoulLong::init_one(i,j);
|
||||
double cut_ljsqm = cut_ljsq[i][j];
|
||||
double cut_coulsqm = cut_coulsq;
|
||||
|
||||
k_params.h_view(i,j).lj1 = lj1[i][j];
|
||||
k_params.h_view(i,j).lj2 = lj2[i][j];
|
||||
k_params.h_view(i,j).lj3 = lj3[i][j];
|
||||
k_params.h_view(i,j).lj4 = lj4[i][j];
|
||||
k_params.h_view(i,j).offset = offset[i][j];
|
||||
k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
|
||||
k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
|
||||
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
|
||||
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
|
||||
}
|
||||
|
||||
k_cutsq.h_view(i,j) = cutone*cutone;
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
k_cut_ljsq.h_view(i,j) = cut_ljsqm;
|
||||
k_cut_ljsq.template modify<LMPHostType>();
|
||||
k_cut_coulsq.h_view(i,j) = cut_coulsqm;
|
||||
k_cut_coulsq.template modify<LMPHostType>();
|
||||
k_params.template modify<LMPHostType>();
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template class PairLJCutCoulLongKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairLJCutCoulLongKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,147 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/coul/long/kk,PairLJCutCoulLongKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/coul/long/kk/device,PairLJCutCoulLongKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/coul/long/kk/host,PairLJCutCoulLongKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
|
||||
#define LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
|
||||
|
||||
#include "pair_kokkos.h"
|
||||
#include "pair_lj_cut_coul_long.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
|
||||
enum {COUL_FLAG=1};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutCoulLongKokkos(class LAMMPS *);
|
||||
~PairLJCutCoulLongKokkos();
|
||||
|
||||
void compute(int, int);
|
||||
|
||||
void settings(int, char **);
|
||||
void init_tables(double cut_coul, double *cut_respa);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
|
||||
struct params_lj_coul{
|
||||
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
|
||||
};
|
||||
|
||||
protected:
|
||||
void cleanup_copy();
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
|
||||
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
|
||||
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
|
||||
|
||||
Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_lj_coul**,
|
||||
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
|
||||
// hardwired to space for 15 atom types
|
||||
params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array c_x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
int newton_pair;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread
|
||||
d_rtable, d_drtable, d_ftable, d_dftable,
|
||||
d_ctable, d_dctable, d_etable, d_detable;
|
||||
class AtomKokkos *atomKK;
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
|
||||
double special_coul[4];
|
||||
double special_lj[4];
|
||||
double qqrd2e;
|
||||
|
||||
void allocate();
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,
|
||||
NeighListKokkos<DeviceType>*);
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >;
|
||||
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,
|
||||
NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -99,18 +99,18 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
c_x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
tag = atomKK->k_tag.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
newton_pair = force->newton_pair;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
newton_pair = force->newton_pair;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag) eng_vdwl += ev.evdwl;
|
||||
|
@ -123,7 +123,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -131,12 +131,15 @@ template<bool STACKPARAMS, class Specialisation>
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
|
||||
const F_FLOAT forcelj = r6inv *
|
||||
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
|
||||
|
||||
return forcelj*r2inv;
|
||||
}
|
||||
|
||||
|
@ -145,8 +148,11 @@ template<bool STACKPARAMS, class Specialisation>
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
|
||||
return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
|
||||
(STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
|
||||
|
@ -262,6 +268,6 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
|
|||
|
||||
|
||||
template class PairLJCutKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairLJCutKokkos<LMPHostType>;
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,7 @@ namespace LAMMPS_NS {
|
|||
template<class DeviceType>
|
||||
class PairLJCutKokkos : public PairLJCut {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutKokkos(class LAMMPS *);
|
||||
|
@ -76,9 +77,10 @@ class PairLJCutKokkos : public PairLJCut {
|
|||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
//typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
|
||||
typename ArrayTypes<DeviceType>::t_tagint_1d tag;
|
||||
|
||||
int newton_pair;
|
||||
double special_lj[4];
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
@ -98,8 +100,13 @@ class PairLJCutKokkos : public PairLJCut {
|
|||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -153,8 +153,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
|||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
} else {
|
||||
if (neighflag == FULL) {
|
||||
|
@ -187,8 +188,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
|||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
@ -203,7 +205,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
|||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -211,6 +213,8 @@ template<bool STACKPARAMS, class Specialisation>
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairTableKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
union_int_float_t rsq_lookup;
|
||||
double fpair;
|
||||
const int tidx = d_table_const.tabindex(itype,jtype);
|
||||
|
@ -254,6 +258,8 @@ template<bool STACKPARAMS, class Specialisation>
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairTableKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
double evdwl;
|
||||
union_int_float_t rsq_lookup;
|
||||
const int tidx = d_table_const.tabindex(itype,jtype);
|
||||
|
@ -292,128 +298,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c
|
|||
return evdwl;
|
||||
}
|
||||
|
||||
/*
|
||||
template<class DeviceType>
|
||||
template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR,int TABSTYLE>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT PairTableKokkos<DeviceType>::
|
||||
compute_item(const int &ii, const NeighListKokkos<DeviceType> &list) const
|
||||
{
|
||||
EV_FLOAT ev;
|
||||
const int tlm1 = tablength - 1;
|
||||
union_int_float_t rsq_lookup;
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const int itype = type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT fxtmp = 0.0;
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = 1.0; //special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
const X_FLOAT delz = ztmp - x(j,2);
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq < d_table_const.cutsq(itype,jtype)) {
|
||||
double fpair;
|
||||
const int tidx = d_table_const.tabindex(itype,jtype);
|
||||
//const Table* const tb = &tables[tabindex[itype][jtype]];
|
||||
|
||||
//if (rsq < d_table_const.innersq(tidx))
|
||||
// error->one(FLERR,"Pair distance < table inner cutoff");
|
||||
|
||||
if (TABSTYLE == LOOKUP) {
|
||||
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//if (itable >= tlm1)
|
||||
// error->one(FLERR,"Pair distance > table outer cutoff");
|
||||
fpair = factor_lj * d_table_const.f(tidx,itable);
|
||||
if (EVFLAG)
|
||||
ev.evdwl = d_table_const.e(tidx,itable);
|
||||
} else if (TABSTYLE == LINEAR) {
|
||||
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//if (itable >= tlm1)
|
||||
// error->one(FLERR,"Pair distance > table outer cutoff");
|
||||
const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
|
||||
const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
|
||||
fpair = factor_lj * value;
|
||||
if (EVFLAG)
|
||||
ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
|
||||
} else if (TABSTYLE == SPLINE) {
|
||||
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
|
||||
//if (itable >= tlm1)
|
||||
// error->one(FLERR,"Pair distance > table outer cutoff");
|
||||
const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
|
||||
const double a = 1.0 - b;
|
||||
const double value = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
|
||||
((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
|
||||
d_table_const.deltasq6(tidx);
|
||||
fpair = factor_lj * value;
|
||||
if (EVFLAG)
|
||||
ev.evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
|
||||
((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
|
||||
d_table_const.deltasq6(tidx);
|
||||
} else {
|
||||
rsq_lookup.f = rsq;
|
||||
int itable = rsq_lookup.i & d_table_const.nmask(tidx);
|
||||
itable >>= d_table_const.nshiftbits(tidx);
|
||||
const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
|
||||
const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
|
||||
fpair = factor_lj * value;
|
||||
if (EVFLAG)
|
||||
ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
|
||||
}
|
||||
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
|
||||
Kokkos::atomic_fetch_add(&f(j,0),-delx*fpair);
|
||||
Kokkos::atomic_fetch_add(&f(j,1),-dely*fpair);
|
||||
Kokkos::atomic_fetch_add(&f(j,2),-delz*fpair);
|
||||
}
|
||||
|
||||
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < nlocal)) {
|
||||
f(j,0) -= delx*fpair;
|
||||
f(j,1) -= dely*fpair;
|
||||
f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if(EVFLAG) {
|
||||
if (eflag) {
|
||||
ev.evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally<NEIGHFLAG>(ev,i,j
|
||||
,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NEIGHFLAG == HALFTHREAD) {
|
||||
Kokkos::atomic_fetch_add(&f(i,0),fxtmp);
|
||||
Kokkos::atomic_fetch_add(&f(i,1),fytmp);
|
||||
Kokkos::atomic_fetch_add(&f(i,2),fztmp);
|
||||
} else {
|
||||
f(i,0) += fxtmp;
|
||||
f(i,1) += fytmp;
|
||||
f(i,2) += fztmp;
|
||||
}
|
||||
|
||||
return ev;
|
||||
}
|
||||
*/
|
||||
|
||||
template<class DeviceType>
|
||||
void PairTableKokkos<DeviceType>::create_kokkos_tables()
|
||||
{
|
||||
|
@ -880,7 +764,6 @@ void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line)
|
|||
word = strtok(NULL," \t\n\r\f");
|
||||
tb->fphi = atof(word);
|
||||
} else {
|
||||
printf("WORD: %s\n",word);
|
||||
error->one(FLERR,"Invalid keyword in pair table parameters");
|
||||
}
|
||||
word = strtok(NULL," \t\n\r\f");
|
||||
|
@ -1494,7 +1377,7 @@ void PairTableKokkos<DeviceType>::cleanup_copy() {
|
|||
}
|
||||
|
||||
template class PairTableKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairTableKokkos<LMPHostType>;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ template<class DeviceType>
|
|||
class PairTableKokkos : public Pair {
|
||||
public:
|
||||
|
||||
enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF&N2&FULLCLUSTER};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
|
||||
|
@ -208,67 +209,10 @@ class PairTableKokkos : public Pair {
|
|||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
/*template<int FULL_NEIGH>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
*/
|
||||
};
|
||||
/*
|
||||
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
|
||||
struct PairTableComputeFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairTableKokkos<DeviceType> c;
|
||||
NeighListKokkos<DeviceType> list;
|
||||
|
||||
PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
|
||||
NeighListKokkos<DeviceType>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
|
||||
else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
|
||||
else
|
||||
energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
|
||||
};
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
void VerletKokkos::setup()
|
||||
{
|
||||
|
||||
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
|
||||
update->setupflag = 1;
|
||||
|
||||
|
@ -65,19 +66,24 @@ void VerletKokkos::setup()
|
|||
atomKK->setup();
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
|
||||
comm->exchange();
|
||||
|
||||
if (atomKK->sortfreq > 0) atomKK->sort();
|
||||
|
||||
comm->borders();
|
||||
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
@ -97,20 +103,47 @@ void VerletKokkos::setup()
|
|||
force_clear();
|
||||
modify->setup_pre_force(vflag);
|
||||
|
||||
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||
if (pair_compute_flag) {
|
||||
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
|
||||
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
|
||||
force->bond->compute(eflag,vflag);
|
||||
}
|
||||
if (force->angle) {
|
||||
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
|
||||
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
|
||||
force->angle->compute(eflag,vflag);
|
||||
}
|
||||
if (force->dihedral) {
|
||||
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
|
||||
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
|
||||
force->dihedral->compute(eflag,vflag);
|
||||
}
|
||||
if (force->improper) {
|
||||
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
|
||||
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
|
||||
force->improper->compute(eflag,vflag);
|
||||
}
|
||||
timer->stamp(TIME_BOND);
|
||||
}
|
||||
|
||||
if (force->kspace) {
|
||||
if(force->kspace) {
|
||||
force->kspace->setup();
|
||||
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||
else force->kspace->compute_dummy(eflag,vflag);
|
||||
if (kspace_compute_flag) {
|
||||
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
|
||||
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
} else force->kspace->compute_dummy(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->newton) comm->reverse_comm();
|
||||
|
@ -172,20 +205,47 @@ void VerletKokkos::setup_minimal(int flag)
|
|||
force_clear();
|
||||
modify->setup_pre_force(vflag);
|
||||
|
||||
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||
if (pair_compute_flag) {
|
||||
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
|
||||
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
|
||||
force->bond->compute(eflag,vflag);
|
||||
}
|
||||
if (force->angle) {
|
||||
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
|
||||
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
|
||||
force->angle->compute(eflag,vflag);
|
||||
}
|
||||
if (force->dihedral) {
|
||||
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
|
||||
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
|
||||
force->dihedral->compute(eflag,vflag);
|
||||
}
|
||||
if (force->improper) {
|
||||
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
|
||||
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
|
||||
force->improper->compute(eflag,vflag);
|
||||
}
|
||||
timer->stamp(TIME_BOND);
|
||||
}
|
||||
|
||||
if (force->kspace) {
|
||||
if(force->kspace) {
|
||||
force->kspace->setup();
|
||||
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||
else force->kspace->compute_dummy(eflag,vflag);
|
||||
if (kspace_compute_flag) {
|
||||
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
|
||||
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
} else force->kspace->compute_dummy(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->newton) comm->reverse_comm();
|
||||
|
@ -291,31 +351,47 @@ void VerletKokkos::run(int n)
|
|||
timer->stamp();
|
||||
|
||||
if (pair_compute_flag) {
|
||||
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
|
||||
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
|
||||
force->bond->compute(eflag,vflag);
|
||||
}
|
||||
if (force->angle) {
|
||||
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
|
||||
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
|
||||
force->angle->compute(eflag,vflag);
|
||||
}
|
||||
if (force->dihedral) {
|
||||
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
|
||||
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
|
||||
force->dihedral->compute(eflag,vflag);
|
||||
}
|
||||
if (force->improper) {
|
||||
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
|
||||
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
|
||||
force->improper->compute(eflag,vflag);
|
||||
}
|
||||
timer->stamp(TIME_BOND);
|
||||
}
|
||||
|
||||
if (kspace_compute_flag) {
|
||||
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
|
||||
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
}
|
||||
|
||||
// reverse communication of forces
|
||||
|
||||
if (force->newton) {
|
||||
atomKK->sync(Host,F_MASK);
|
||||
comm->reverse_comm();
|
||||
atomKK->modified(Host,F_MASK);
|
||||
timer->stamp(TIME_COMM);
|
||||
}
|
||||
if (force->newton) comm->reverse_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
|
||||
// force modifications, final time integration, diagnostics
|
||||
|
||||
|
|
Loading…
Reference in New Issue