git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12365 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-08-27 16:17:57 +00:00
parent d1e51c4359
commit d9f92faf40
39 changed files with 14360 additions and 667 deletions

View File

@ -40,12 +40,12 @@ if (test $1 = 1) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package
# sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package
# sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then

View File

@ -37,31 +37,43 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
AtomKokkos::~AtomKokkos()
{
k_tag = DAT::tdual_tagint_1d();
k_mask = DAT::tdual_int_1d();
k_type = DAT::tdual_int_1d();
k_image = DAT::tdual_imageint_1d();
k_molecule = DAT::tdual_tagint_1d();
memory->destroy_kokkos(k_tag, tag);
memory->destroy_kokkos(k_mask, mask);
memory->destroy_kokkos(k_type, type);
memory->destroy_kokkos(k_image, image);
memory->destroy_kokkos(k_molecule, molecule);
k_x = DAT::tdual_x_array();
k_v = DAT::tdual_v_array();
k_f = DAT::tdual_f_array();
memory->destroy_kokkos(k_x, x);
memory->destroy_kokkos(k_v, v);
memory->destroy_kokkos(k_f, f);
k_mass = DAT::tdual_float_1d();
memory->destroy_kokkos(k_mass, mass);
tag = NULL;
mask = NULL;
type = NULL;
image = NULL;
molecule = NULL;
mass = NULL;
memory->destroy_kokkos(k_q,q);
memory->destroy_kokkos(k_nspecial, nspecial);
memory->destroy_kokkos(k_special, special);
memory->destroy_kokkos(k_num_bond, num_bond);
memory->destroy_kokkos(k_bond_type, bond_type);
memory->destroy_kokkos(k_bond_atom, bond_atom);
memory->destroy_kokkos(k_num_angle, num_angle);
memory->destroy_kokkos(k_angle_type, angle_type);
memory->destroy_kokkos(k_angle_atom1, angle_atom1);
memory->destroy_kokkos(k_angle_atom2, angle_atom2);
memory->destroy_kokkos(k_angle_atom3, angle_atom3);
memory->destroy_kokkos(k_num_dihedral, num_dihedral);
memory->destroy_kokkos(k_dihedral_type, dihedral_type);
memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
memory->destroy_kokkos(k_num_improper, num_improper);
memory->destroy_kokkos(k_improper_type, improper_type);
memory->destroy_kokkos(k_improper_atom1, improper_atom1);
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
memory->sfree(x);
memory->sfree(v);
memory->sfree(f);
x = NULL;
v = NULL;
f = NULL;
}
/* ---------------------------------------------------------------------- */
@ -96,9 +108,6 @@ void AtomKokkos::sort()
{
int i,m,n,ix,iy,iz,ibin,empty;
sync(Host,ALL_MASK);
modified(Host,ALL_MASK);
// set next timestep for sorting to take place
nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
@ -122,6 +131,9 @@ void AtomKokkos::sort()
if (nlocal == nmax) avec->grow(0);
sync(Host,ALL_MASK);
modified(Host,ALL_MASK);
// bin atoms in reverse order so linked list will be in forward order
for (i = 0; i < nbins; i++) binhead[i] = -1;
@ -188,3 +200,43 @@ void AtomKokkos::sort()
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
//if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
}
/* ----------------------------------------------------------------------
reallocate memory to the pointer selected by the mask
------------------------------------------------------------------------- */
void AtomKokkos::grow(unsigned int mask){
if (mask && SPECIAL_MASK){
memory->destroy_kokkos(k_special, special);
sync(Device, mask);
modified(Device, mask);
memory->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special");
avec->grow_reset();
sync(Host, mask);
}
}
void AtomKokkos::deallocate_topology()
{
memory->destroy_kokkos(k_bond_type, bond_type);
memory->destroy_kokkos(k_bond_atom, bond_atom);
memory->destroy_kokkos(k_angle_type, angle_type);
memory->destroy_kokkos(k_angle_atom1, angle_atom1);
memory->destroy_kokkos(k_angle_atom2, angle_atom2);
memory->destroy_kokkos(k_angle_atom3, angle_atom3);
memory->destroy_kokkos(k_dihedral_type, dihedral_type);
memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1);
memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2);
memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3);
memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4);
memory->destroy_kokkos(k_improper_type, improper_type);
memory->destroy_kokkos(k_improper_atom1, improper_atom1);
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
}

View File

@ -21,15 +21,33 @@ namespace LAMMPS_NS {
class AtomKokkos : public Atom {
public:
DAT::tdual_tagint_1d k_tag, k_molecule;
DAT::tdual_imageint_1d k_image;
DAT::tdual_tagint_1d k_tag;
DAT::tdual_int_1d k_type, k_mask;
DAT::tdual_imageint_1d k_image;
DAT::tdual_x_array k_x;
DAT::tdual_v_array k_v;
DAT::tdual_f_array k_f;
DAT::tdual_float_1d k_mass;
DAT::tdual_float_1d k_q;
DAT::tdual_tagint_1d k_molecule;
DAT::tdual_int_2d k_nspecial;
DAT::tdual_tagint_2d k_special;
DAT::tdual_int_1d k_num_bond;
DAT::tdual_int_2d k_bond_type;
DAT::tdual_tagint_2d k_bond_atom;
DAT::tdual_int_1d k_num_angle;
DAT::tdual_int_2d k_angle_type;
DAT::tdual_tagint_2d k_angle_atom1, k_angle_atom2, k_angle_atom3;
DAT::tdual_int_1d k_num_dihedral;
DAT::tdual_int_2d k_dihedral_type;
DAT::tdual_tagint_2d k_dihedral_atom1, k_dihedral_atom2, k_dihedral_atom3, k_dihedral_atom4;
DAT::tdual_int_1d k_num_improper;
DAT::tdual_int_2d k_improper_type;
DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4;
AtomKokkos(class LAMMPS *);
~AtomKokkos();
@ -37,6 +55,8 @@ class AtomKokkos : public Atom {
void sync(const ExecutionSpace space, unsigned int mask);
void modified(const ExecutionSpace space, unsigned int mask);
virtual void sort();
virtual void grow(unsigned int mask);
virtual void deallocate_topology();
};
template<class ViewType, class IndexView>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,153 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(angle/kk,AtomVecAngleKokkos)
#else
#ifndef LMP_ATOM_VEC_ANGLE_KOKKOS_H
#define LMP_ATOM_VEC_ANGLE_KOKKOS_H
#include "atom_vec_kokkos.h"
namespace LAMMPS_NS {
class AtomVecAngleKokkos : public AtomVecKokkos {
public:
AtomVecAngleKokkos(class LAMMPS *);
virtual ~AtomVecAngleKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
tagint *molecule;
int **nspecial;
tagint **special;
int *num_bond;
int **bond_type;
tagint **bond_atom;
int *num_angle;
int **angle_type;
tagint **angle_atom1,**angle_atom2,**angle_atom3;
DAT::t_tagint_1d d_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_tagint_1d h_tag;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_tagint_1d d_molecule;
DAT::t_int_2d d_nspecial;
DAT::t_tagint_2d d_special;
DAT::t_int_1d d_num_bond;
DAT::t_int_2d d_bond_type;
DAT::t_tagint_2d d_bond_atom;
HAT::t_tagint_1d h_molecule;
HAT::t_int_2d h_nspecial;
HAT::t_tagint_2d h_special;
HAT::t_int_1d h_num_bond;
HAT::t_int_2d h_bond_type;
HAT::t_tagint_2d h_bond_atom;
DAT::t_int_1d d_num_angle;
DAT::t_int_2d d_angle_type;
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
HAT::t_int_1d h_num_angle;
HAT::t_int_2d h_angle_type;
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -553,14 +553,15 @@ void AtomVecAtomicKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
{
int i,m,last;
if(n > 0)
sync(Host,F_MASK);
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = f[i][0];
buf[m++] = f[i][1];
buf[m++] = f[i][2];
int m = 0;
const int last = first + n;
for (int i = first; i < last; i++) {
buf[m++] = h_f(i,0);
buf[m++] = h_f(i,1);
buf[m++] = h_f(i,2);
}
return m;
}
@ -569,14 +570,17 @@ int AtomVecAtomicKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecAtomicKokkos::unpack_reverse(int n, int *list, double *buf)
{
int i,j,m;
if(n > 0) {
sync(Host,F_MASK);
modified(Host,F_MASK);
}
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
f[j][0] += buf[m++];
f[j][1] += buf[m++];
f[j][2] += buf[m++];
int m = 0;
for (int i = 0; i < n; i++) {
const int j = list[i];
h_f(j,0) += buf[m++];
h_f(j,1) += buf[m++];
h_f(j,2) += buf[m++];
}
}
@ -588,11 +592,11 @@ struct AtomVecAtomicKokkos_PackBorder {
typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
const int _iswap;
const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
const typename ArrayTypes<DeviceType>::t_int_1d _type;
const typename ArrayTypes<DeviceType>::t_int_1d _mask;
const int _iswap;
X_FLOAT _dx,_dy,_dz;
AtomVecAtomicKokkos_PackBorder(
@ -694,9 +698,9 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_tag[j];
buf[m++] = h_type[j];
buf[m++] = h_mask[j];
buf[m++] = ubuf(h_tag(j)).d;
buf[m++] = ubuf(h_type(j)).d;
buf[m++] = ubuf(h_mask(j)).d;
}
} else {
if (domain->triclinic == 0) {
@ -713,11 +717,16 @@ int AtomVecAtomicKokkos::pack_border(int n, int *list, double *buf,
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_tag[j];
buf[m++] = h_type[j];
buf[m++] = h_mask[j];
buf[m++] = ubuf(h_tag(j)).d;
buf[m++] = ubuf(h_type(j)).d;
buf[m++] = ubuf(h_mask(j)).d;
}
}
if (atom->nextra_border)
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
return m;
}
@ -736,9 +745,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
buf[m++] = h_x(j,0);
buf[m++] = h_x(j,1);
buf[m++] = h_x(j,2);
buf[m++] = h_tag[j];
buf[m++] = h_type[j];
buf[m++] = h_mask[j];
buf[m++] = ubuf(h_tag(j)).d;
buf[m++] = ubuf(h_type(j)).d;
buf[m++] = ubuf(h_mask(j)).d;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
@ -759,9 +768,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_tag[j];
buf[m++] = h_type[j];
buf[m++] = h_mask[j];
buf[m++] = ubuf(h_tag(j)).d;
buf[m++] = ubuf(h_type(j)).d;
buf[m++] = ubuf(h_mask(j)).d;
buf[m++] = h_v(j,0);
buf[m++] = h_v(j,1);
buf[m++] = h_v(j,2);
@ -775,9 +784,9 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
buf[m++] = h_x(j,0) + dx;
buf[m++] = h_x(j,1) + dy;
buf[m++] = h_x(j,2) + dz;
buf[m++] = h_tag[j];
buf[m++] = h_type[j];
buf[m++] = h_mask[j];
buf[m++] = ubuf(h_tag(j)).d;
buf[m++] = ubuf(h_type(j)).d;
buf[m++] = ubuf(h_mask(j)).d;
if (mask[i] & deform_groupbit) {
buf[m++] = h_v(j,0) + dvx;
buf[m++] = h_v(j,1) + dvy;
@ -790,6 +799,11 @@ int AtomVecAtomicKokkos::pack_border_vel(int n, int *list, double *buf,
}
}
}
if (atom->nextra_border)
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]);
return m;
}
@ -861,10 +875,15 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_tag[i] = static_cast<int> (buf[m++]);
h_type[i] = static_cast<int> (buf[m++]);
h_mask[i] = static_cast<int> (buf[m++]);
h_tag(i) = (tagint) ubuf(buf[m++]).i;
h_type(i) = (int) ubuf(buf[m++]).i;
h_mask(i) = (int) ubuf(buf[m++]).i;
}
if (atom->nextra_border)
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
}
/* ---------------------------------------------------------------------- */
@ -880,13 +899,18 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
h_tag[i] = static_cast<int> (buf[m++]);
h_type[i] = static_cast<int> (buf[m++]);
h_mask[i] = static_cast<int> (buf[m++]);
h_tag(i) = (tagint) ubuf(buf[m++]).i;
h_type(i) = (int) ubuf(buf[m++]).i;
h_mask(i) = (int) ubuf(buf[m++]).i;
h_v(i,0) = buf[m++];
h_v(i,1) = buf[m++];
h_v(i,2) = buf[m++];
}
if (atom->nextra_border)
for (int iextra = 0; iextra < atom->nextra_border; iextra++)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
}
/* ---------------------------------------------------------------------- */
@ -895,7 +919,6 @@ template<class DeviceType>
struct AtomVecAtomicKokkos_PackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
X_FLOAT _lo,_hi;
typename AT::t_x_array_randomread _x;
typename AT::t_v_array_randomread _v;
typename AT::t_tagint_1d_randomread _tag;
@ -910,9 +933,10 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
typename AT::t_imageint_1d _imagew;
typename AT::t_xfloat_2d_um _buf;
int _nlocal,_dim;
typename AT::t_int_1d_const _sendlist;
typename AT::t_int_1d_const _copylist;
int _nlocal,_dim;
X_FLOAT _lo,_hi;
AtomVecAtomicKokkos_PackExchangeFunctor(
const AtomKokkos* atom,
@ -977,7 +1001,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
{
if(nsend > (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
if(nsend > (int) (k_buf.view<LMPHostType>().dimension_0()*k_buf.view<LMPHostType>().dimension_1())/11) {
int newsize = nsend*11/k_buf.view<LMPHostType>().dimension_1()+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().dimension_1());
}
@ -1005,10 +1029,10 @@ int AtomVecAtomicKokkos::pack_exchange(int i, double *buf)
buf[m++] = h_v(i,0);
buf[m++] = h_v(i,1);
buf[m++] = h_v(i,2);
buf[m++] = h_tag[i];
buf[m++] = h_type[i];
buf[m++] = h_mask[i];
*((tagint *) &buf[m++]) = h_image[i];
buf[m++] = ubuf(h_tag(i)).d;
buf[m++] = ubuf(h_type(i)).d;
buf[m++] = ubuf(h_mask(i)).d;
buf[m++] = ubuf(h_image(i)).d;
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -1024,7 +1048,6 @@ template<class DeviceType>
struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
X_FLOAT _lo,_hi;
typename AT::t_x_array _x;
typename AT::t_v_array _v;
typename AT::t_tagint_1d _tag;
@ -1033,8 +1056,9 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
typename AT::t_imageint_1d _image;
typename AT::t_xfloat_2d_um _buf;
int _dim;
typename AT::t_int_1d _nlocal;
int _dim;
X_FLOAT _lo,_hi;
AtomVecAtomicKokkos_UnpackExchangeFunctor(
const AtomKokkos* atom,
@ -1113,10 +1137,10 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
h_v(nlocal,0) = buf[m++];
h_v(nlocal,1) = buf[m++];
h_v(nlocal,2) = buf[m++];
h_tag[nlocal] = static_cast<int> (buf[m++]);
h_type[nlocal] = static_cast<int> (buf[m++]);
h_mask[nlocal] = static_cast<int> (buf[m++]);
h_image[nlocal] = static_cast<int> (buf[m++]);
h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
h_type(nlocal) = (int) ubuf(buf[m++]).i;
h_mask(nlocal) = (int) ubuf(buf[m++]).i;
h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -1159,10 +1183,10 @@ int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
buf[m++] = h_x(i,0);
buf[m++] = h_x(i,1);
buf[m++] = h_x(i,2);
buf[m++] = h_tag[i];
buf[m++] = h_type[i];
buf[m++] = h_mask[i];
buf[m++] = h_image[i];
buf[m++] = ubuf(h_tag(i)).d;
buf[m++] = ubuf(h_type(i)).d;
buf[m++] = ubuf(h_mask(i)).d;
buf[m++] = ubuf(h_image(i)).d;
buf[m++] = h_v(i,0);
buf[m++] = h_v(i,1);
buf[m++] = h_v(i,2);
@ -1192,17 +1216,17 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
h_x(nlocal,0) = buf[m++];
h_x(nlocal,1) = buf[m++];
h_x(nlocal,2) = buf[m++];
h_tag[nlocal] = static_cast<int> (buf[m++]);
h_type[nlocal] = static_cast<int> (buf[m++]);
h_mask[nlocal] = static_cast<int> (buf[m++]);
h_image[nlocal] = *((tagint *) &buf[m++]);
h_tag(nlocal) = (tagint) ubuf(buf[m++]).i;
h_type(nlocal) = (int) ubuf(buf[m++]).i;
h_mask(nlocal) = (int) ubuf(buf[m++]).i;
h_image(nlocal) = (imageint) ubuf(buf[m++]).i;
h_v(nlocal,0) = buf[m++];
h_v(nlocal,1) = buf[m++];
h_v(nlocal,2) = buf[m++];
double **extra = atom->extra;
if (atom->nextra_store) {
int size = static_cast<int> (buf[0]) - m;
int size = static_cast<int> (ubuf(buf[m++]).i) - m;
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,141 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(bond/kk,AtomVecBondKokkos)
#else
#ifndef LMP_ATOM_VEC_BOND_KOKKOS_H
#define LMP_ATOM_VEC_BOND_KOKKOS_H
#include "atom_vec_kokkos.h"
namespace LAMMPS_NS {
class AtomVecBondKokkos : public AtomVecKokkos {
public:
AtomVecBondKokkos(class LAMMPS *);
virtual ~AtomVecBondKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
tagint *molecule;
int **nspecial;
tagint **special;
int *num_bond;
int **bond_type;
tagint **bond_atom;
DAT::t_tagint_1d d_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_tagint_1d h_tag;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_tagint_1d d_molecule;
DAT::t_int_2d d_nspecial;
DAT::t_tagint_2d d_special;
DAT::t_int_1d d_num_bond;
DAT::t_int_2d d_bond_type;
DAT::t_tagint_2d d_bond_atom;
HAT::t_tagint_1d h_molecule;
HAT::t_int_2d h_nspecial;
HAT::t_tagint_2d h_special;
HAT::t_int_1d h_num_bond;
HAT::t_int_2d h_bond_type;
HAT::t_tagint_2d h_bond_atom;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(charge/kk,AtomVecChargeKokkos)
#else
#ifndef LMP_ATOM_VEC_CHARGE_KOKKOS_H
#define LMP_ATOM_VEC_CHARGE_KOKKOS_H
#include "atom_vec_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecChargeKokkos : public AtomVecKokkos {
public:
AtomVecChargeKokkos(class LAMMPS *);
virtual ~AtomVecChargeKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int , char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
double *q;
DAT::t_tagint_1d d_tag;
HAT::t_tagint_1d h_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_float_1d d_q;
HAT::t_float_1d h_q;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,183 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(full/kk,AtomVecFullKokkos)
#else
#ifndef LMP_ATOM_VEC_FULL_KOKKOS_H
#define LMP_ATOM_VEC_FULL_KOKKOS_H
#include "atom_vec_kokkos.h"
namespace LAMMPS_NS {
class AtomVecFullKokkos : public AtomVecKokkos {
public:
AtomVecFullKokkos(class LAMMPS *);
virtual ~AtomVecFullKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
double *q;
tagint *molecule;
int **nspecial;
tagint **special;
int *num_bond;
int **bond_type;
tagint **bond_atom;
int *num_angle;
int **angle_type;
tagint **angle_atom1,**angle_atom2,**angle_atom3;
int *num_dihedral;
int **dihedral_type;
tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
int *num_improper;
int **improper_type;
tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
DAT::t_tagint_1d d_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_tagint_1d h_tag;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_float_1d d_q;
HAT::t_float_1d h_q;
DAT::t_tagint_1d d_molecule;
DAT::t_int_2d d_nspecial;
DAT::t_tagint_2d d_special;
DAT::t_int_1d d_num_bond;
DAT::t_int_2d d_bond_type;
DAT::t_tagint_2d d_bond_atom;
HAT::t_tagint_1d h_molecule;
HAT::t_int_2d h_nspecial;
HAT::t_tagint_2d h_special;
HAT::t_int_1d h_num_bond;
HAT::t_int_2d h_bond_type;
HAT::t_tagint_2d h_bond_atom;
DAT::t_int_1d d_num_angle;
DAT::t_int_2d d_angle_type;
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
HAT::t_int_1d h_num_angle;
HAT::t_int_2d h_angle_type;
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
DAT::t_int_1d d_num_dihedral;
DAT::t_int_2d d_dihedral_type;
DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
d_dihedral_atom3,d_dihedral_atom4;
DAT::t_int_1d d_num_improper;
DAT::t_int_2d d_improper_type;
DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
d_improper_atom3,d_improper_atom4;
HAT::t_int_1d h_num_dihedral;
HAT::t_int_2d h_dihedral_type;
HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
h_dihedral_atom3,h_dihedral_atom4;
HAT::t_int_1d h_num_improper;
HAT::t_int_2d h_improper_type;
HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
h_improper_atom3,h_improper_atom4;
HAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -24,43 +24,43 @@ class AtomVecKokkos : public AtomVec {
AtomVecKokkos(class LAMMPS *);
virtual ~AtomVecKokkos() {}
virtual void sync(ExecutionSpace space, unsigned int mask) {};
virtual void modified(ExecutionSpace space, unsigned int mask) {};
virtual void sync(ExecutionSpace space, unsigned int mask) = 0;
virtual void modified(ExecutionSpace space, unsigned int mask) = 0;
virtual int
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[])
{return 0;}
const int &pbc_flag, const int pbc[]) = 0;
//{return 0;}
virtual int
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[])
{return 0;}
const int &pbc_flag, const int pbc[]) = 0;
//{return 0;}
virtual void
unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf) {};
const DAT::tdual_xfloat_2d &buf) = 0;
virtual int
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space)
{return 0;};
int pbc_flag, int *pbc, ExecutionSpace space) = 0;
//{return 0;};
virtual void
unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {};
ExecutionSpace space) = 0;
virtual int
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi)
{return 0;};
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi) = 0;
//{return 0;};
virtual int
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space)
{return 0;};
ExecutionSpace space) = 0;
//{return 0;};
protected:
class AtomKokkos *atomKK;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,178 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(molecular/kk,AtomVecMolecularKokkos)
#else
#ifndef LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
#define LMP_ATOM_VEC_MOLECULAR_KOKKOS_H
#include "atom_vec_kokkos.h"
namespace LAMMPS_NS {
class AtomVecMolecularKokkos : public AtomVecKokkos {
public:
AtomVecMolecularKokkos(class LAMMPS *);
virtual ~AtomVecMolecularKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
tagint *molecule;
int **nspecial;
tagint **special;
int *num_bond;
int **bond_type;
tagint **bond_atom;
int *num_angle;
int **angle_type;
tagint **angle_atom1,**angle_atom2,**angle_atom3;
int *num_dihedral;
int **dihedral_type;
tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4;
int *num_improper;
int **improper_type;
tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
DAT::t_tagint_1d d_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_tagint_1d h_tag;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_tagint_1d d_molecule;
DAT::t_int_2d d_nspecial;
DAT::t_tagint_2d d_special;
DAT::t_int_1d d_num_bond;
DAT::t_int_2d d_bond_type;
DAT::t_tagint_2d d_bond_atom;
HAT::t_tagint_1d h_molecule;
HAT::t_int_2d h_nspecial;
HAT::t_tagint_2d h_special;
HAT::t_int_1d h_num_bond;
HAT::t_int_2d h_bond_type;
HAT::t_tagint_2d h_bond_atom;
DAT::t_int_1d d_num_angle;
DAT::t_int_2d d_angle_type;
DAT::t_tagint_2d d_angle_atom1,d_angle_atom2,d_angle_atom3;
HAT::t_int_1d h_num_angle;
HAT::t_int_2d h_angle_type;
HAT::t_tagint_2d h_angle_atom1,h_angle_atom2,h_angle_atom3;
DAT::t_int_1d d_num_dihedral;
DAT::t_int_2d d_dihedral_type;
DAT::t_tagint_2d d_dihedral_atom1,d_dihedral_atom2,
d_dihedral_atom3,d_dihedral_atom4;
DAT::t_int_1d d_num_improper;
DAT::t_int_2d d_improper_type;
DAT::t_tagint_2d d_improper_atom1,d_improper_atom2,
d_improper_atom3,d_improper_atom4;
HAT::t_int_1d h_num_dihedral;
HAT::t_int_2d h_dihedral_type;
HAT::t_tagint_2d h_dihedral_atom1,h_dihedral_atom2,
h_dihedral_atom3,h_dihedral_atom4;
HAT::t_int_1d h_num_improper;
HAT::t_int_2d h_improper_type;
HAT::t_tagint_2d h_improper_atom1,h_improper_atom2,
h_improper_atom3,h_improper_atom4;
HAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -21,6 +21,13 @@
#include "atom_masks.h"
#include "error.h"
#include "memory.h"
#include "force.h"
#include "pair.h"
#include "fix.h"
#include "compute.h"
#include "dump.h"
#include "output.h"
#include "modify.h"
using namespace LAMMPS_NS;
@ -43,15 +50,19 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
// initialize comm buffers & exchange memory
maxsend = BUFMIN;
k_buf_send = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
// maxsend = BUFMIN;
// k_buf_send = ArrayTypes<LMPDeviceType>::
// tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
// buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
maxsend = 0;
buf_send = NULL;
maxrecv = BUFMIN;
k_buf_recv = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
// maxrecv = BUFMIN;
// k_buf_recv = ArrayTypes<LMPDeviceType>::
// tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
// buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
maxrecv = 0;
buf_recv = NULL;
k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
tdual_int_1d("comm:k_exchange_sendlist",100);
@ -89,6 +100,34 @@ void CommKokkos::init()
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
CommBrick::init();
int check_forward = 0;
int check_reverse = 0;
if (force->pair)
check_forward += force->pair->comm_forward;
if (force->pair)
check_reverse += force->pair->comm_reverse;
for (int i = 0; i < modify->nfix; i++) {
check_forward += modify->fix[i]->comm_forward;
check_reverse += modify->fix[i]->comm_reverse;
}
for (int i = 0; i < modify->ncompute; i++) {
check_forward += modify->compute[i]->comm_forward;
check_reverse += modify->compute[i]->comm_reverse;
}
for (int i = 0; i < output->ndump; i++) {
check_forward += output->dump[i]->comm_forward;
check_reverse += output->dump[i]->comm_reverse;
}
if (force->newton == 0) check_reverse = 0;
if (force->pair) check_reverse += force->pair->comm_reverse_off;
if(check_reverse || check_forward)
forward_comm_classic = true;
}
/* ----------------------------------------------------------------------
@ -98,8 +137,7 @@ void CommKokkos::init()
void CommKokkos::forward_comm(int dummy)
{
if (!forward_comm_classic) {
if (!forward_comm_classic) {
if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
else forward_comm_device<LMPDeviceType>(dummy);
return;
@ -205,6 +243,68 @@ void CommKokkos::forward_comm_device(int dummy)
}
}
}
void CommKokkos::reverse_comm()
{
k_sendlist.sync<LMPHostType>();
if (comm_f_only)
atomKK->sync(Host,F_MASK);
else
atomKK->sync(Host,ALL_MASK);
CommBrick::reverse_comm();
if (comm_f_only)
atomKK->modified(Host,F_MASK);
else
atomKK->modified(Host,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
}
void CommKokkos::forward_comm_fix(Fix *fix)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_fix(fix);
}
void CommKokkos::reverse_comm_fix(Fix *fix)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm_fix(fix);
}
void CommKokkos::forward_comm_compute(Compute *compute)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_compute(compute);
}
void CommKokkos::reverse_comm_compute(Compute *compute)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm_compute(compute);
}
void CommKokkos::forward_comm_pair(Pair *pair)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_pair(pair);
}
void CommKokkos::reverse_comm_pair(Pair *pair)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm_pair(pair);
}
void CommKokkos::forward_comm_dump(Dump *dump)
{
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_dump(dump);
}
void CommKokkos::reverse_comm_dump(Dump *dump)
{
k_sendlist.sync<LMPHostType>();
CommBrick::reverse_comm_dump(dump);
}
/* ----------------------------------------------------------------------
exchange: move atoms to correct processors
@ -219,6 +319,16 @@ void CommKokkos::forward_comm_device(int dummy)
void CommKokkos::exchange()
{
if(atom->nextra_grow + atom->nextra_border) {
if(!exchange_comm_classic) {
static int print = 1;
if(print) {
error->warning(FLERR,"Kokkos communication does not currently support fixes sending data. Switching to classic communication.");
print = 0;
}
exchange_comm_classic = true;
}
}
if (!exchange_comm_classic) {
if (exchange_comm_on_host) exchange_device<LMPHostType>();
else exchange_device<LMPDeviceType>();
@ -463,10 +573,12 @@ void CommKokkos::borders()
}
atomKK->sync(Host,ALL_MASK);
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
CommBrick::borders();
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
}
/* ---------------------------------------------------------------------- */
@ -496,7 +608,7 @@ struct BuildBorderListFunctor {
KOKKOS_INLINE_FUNCTION
void operator() (DeviceType dev) const {
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) /
dev.league_size());
const int teamstart = chunk*dev.league_rank() + nfirst;
@ -517,7 +629,7 @@ struct BuildBorderListFunctor {
}
}
size_t shmem_size() const { return 1000u;}
size_t shmem_size(const int team_size) const { (void) team_size; return 1000u;}
};
/* ---------------------------------------------------------------------- */
@ -591,16 +703,19 @@ void CommKokkos::borders_device() {
total_send.template modify<DeviceType>();
total_send.template sync<LMPDeviceType>();
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>();
if(total_send.h_view(0) >= maxsendlist[iswap]) {
grow_list(iswap,total_send.h_view(0));
k_sendlist.modify<DeviceType>();
total_send.h_view(0) = 0;
if(exec_space == Device) {
total_send.template modify<LMPHostType>();
@ -608,7 +723,7 @@ void CommKokkos::borders_device() {
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
@ -742,6 +857,25 @@ void CommKokkos::borders_device() {
atomKK->modified(exec_space,ALL_MASK);
DeviceType::fence();
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void CommKokkos::grow_send(int n, int flag)
{
grow_send_kokkos(n,flag,Host);
}
/* ----------------------------------------------------------------------
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_recv(int n)
{
grow_recv_kokkos(n,Host);
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA

View File

@ -33,9 +33,19 @@ class CommKokkos : public CommBrick {
void init();
void forward_comm(int dummy = 0); // forward comm of atom coords
void reverse_comm(); // reverse comm of atom coords
void exchange(); // move atoms to new procs
void borders(); // setup list of atoms to comm
void forward_comm_pair(class Pair *); // forward comm from a Pair
void reverse_comm_pair(class Pair *); // reverse comm from a Pair
void forward_comm_fix(class Fix *); // forward comm from a Fix
void reverse_comm_fix(class Fix *); // reverse comm from a Fix
void forward_comm_compute(class Compute *); // forward from a Compute
void reverse_comm_compute(class Compute *); // reverse from a Compute
void forward_comm_dump(class Dump *); // forward comm from a Dump
void reverse_comm_dump(class Dump *); // reverse comm from a Dump
template<class DeviceType> void forward_comm_device(int dummy);
template<class DeviceType> void exchange_device();
template<class DeviceType> void borders_device();
@ -48,6 +58,8 @@ class CommKokkos : public CommBrick {
//double *buf_send; // send buffer for all comm
//double *buf_recv; // recv buffer for all comm
void grow_send(int, int);
void grow_recv(int);
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
void grow_recv_kokkos(int, ExecutionSpace space = Host);
void grow_list(int, int);

View File

@ -0,0 +1,810 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "string.h"
#include "fix_langevin_kokkos.h"
#include "atom_masks.h"
#include "atom_kokkos.h"
#include "force.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include "memory.h"
#include "group.h"
#include "random_mars.h"
#include "compute.h"
#include "comm.h"
#include "modify.h"
#include "input.h"
#include "variable.h"
using namespace LAMMPS_NS;
using namespace FixConst;
enum{NOBIAS,BIAS};
enum{CONSTANT,EQUAL,ATOM};
#define SINERTIA 0.4 // moment of inertia prefactor for sphere
#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **arg) :
FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me)
{
atomKK = (AtomKokkos *) atom;
int ntypes = atomKK->ntypes;
// allocate per-type arrays for force prefactors
memory->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1");
memory->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2");
memory->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio");
d_gfactor1 = k_gfactor1.template view<DeviceType>();
h_gfactor1 = k_gfactor1.template view<LMPHostType>();
d_gfactor2 = k_gfactor2.template view<DeviceType>();
h_gfactor2 = k_gfactor2.template view<LMPHostType>();
d_ratio = k_ratio.template view<DeviceType>();
h_ratio = k_ratio.template view<LMPHostType>();
// optional args
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
k_ratio.template modify<LMPHostType>();
if(gjfflag){
nvalues = 3;
grow_arrays(atomKK->nmax);
atom->add_callback(0);
// initialize franprev to zero
for (int i = 0; i < atomKK->nlocal; i++) {
franprev[i][0] = 0.0;
franprev[i][1] = 0.0;
franprev[i][2] = 0.0;
}
k_franprev.template modify<LMPHostType>();
}
if(zeroflag){
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
h_fsumall = k_fsumall.template view<LMPHostType>();
d_fsumall = k_fsumall.template view<DeviceType>();
}
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
datamask_modify = F_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
{
memory->destroy_kokkos(k_gfactor1,gfactor1);
memory->destroy_kokkos(k_gfactor2,gfactor2);
memory->destroy_kokkos(k_ratio,ratio);
memory->destroy_kokkos(k_flangevin,flangevin);
if(gjfflag) memory->destroy_kokkos(k_franprev,franprev);
memory->destroy_kokkos(k_tforce,tforce);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::init()
{
FixLangevin::init();
if(oflag)
error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
if(ascale)
error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
// prefactors are modified in the init
k_gfactor1.template modify<LMPHostType>();
k_gfactor2.template modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax)
{
memory->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev");
d_franprev = k_franprev.template view<DeviceType>();
h_franprev = k_franprev.template view<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::post_force(int vflag)
{
// sync the device views which might have been modified on host
atomKK->sync(execution_space,datamask_read);
rmass = atomKK->rmass;
f = atomKK->k_f.template view<DeviceType>();
v = atomKK->k_v.template view<DeviceType>();
type = atomKK->k_type.template view<DeviceType>();
mask = atomKK->k_mask.template view<DeviceType>();
k_gfactor1.template sync<DeviceType>();
k_gfactor2.template sync<DeviceType>();
k_ratio.template sync<DeviceType>();
if(gjfflag) k_franprev.template sync<DeviceType>();
boltz = force->boltz;
dt = update->dt;
mvv2e = force->mvv2e;
ftm2v = force->ftm2v;
fran_prop_const = sqrt(24.0*boltz/t_period/dt/mvv2e);
compute_target(); // modifies tforce vector, hence sync here
k_tforce.template sync<DeviceType>();
double fsum[3],fsumall[3];
bigint count;
int nlocal = atomKK->nlocal;
if (zeroflag) {
fsum[0] = fsum[1] = fsum[2] = 0.0;
count = group->count(igroup);
if (count == 0)
error->all(FLERR,"Cannot zero Langevin force of 0 atoms");
}
// reallocate flangevin if necessary
if (tallyflag) {
if (nlocal > maxatom1) {
memory->destroy_kokkos(k_flangevin,flangevin);
maxatom1 = atomKK->nmax;
memory->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin");
d_flangevin = k_flangevin.template view<DeviceType>();
h_flangevin = k_flangevin.template view<LMPHostType>();
}
}
// account for bias velocity
if(tbiasflag == BIAS){
temperature->compute_scalar();
temperature->remove_bias_all(); // modifies velocities
// if temeprature compute is kokkosized host-devcie comm won't be needed
atomKK->modified(Host,V_MASK);
atomKK->sync(execution_space,V_MASK);
}
// compute langevin force in parallel on the device
FSUM s_fsum;
if (tstyle == ATOM)
if (gjfflag)
if (tallyflag)
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else{
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else{
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,1,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,1,0,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tallyflag)
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,1,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,1,0,0,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (gjfflag)
if (tallyflag)
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,1,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,1,0,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tallyflag)
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,1,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (tbiasflag == BIAS)
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,1,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (rmass)
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,1,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
else
if (zeroflag) {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,1> post_functor(this);
Kokkos::parallel_reduce(nlocal,post_functor,s_fsum);
}
else {
FixLangevinKokkosPostForceFunctor<DeviceType,0,0,0,0,0,0> post_functor(this);
Kokkos::parallel_for(nlocal,post_functor);
}
DeviceType::fence();
if(tbiasflag == BIAS){
temperature->restore_bias_all(); // modifies velocities
atomKK->modified(Host,V_MASK);
}
// set modify flags for the views modified in post_force functor
if (gjfflag) k_franprev.template modify<DeviceType>();
if (tallyflag) k_flangevin.template modify<DeviceType>();
// set total force to zero
if (zeroflag) {
fsum[0] = s_fsum.fx; fsum[1] = s_fsum.fy; fsum[2] = s_fsum.fz;
MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world);
h_fsumall(0) = fsumall[0]/count;
h_fsumall(1) = fsumall[1]/count;
h_fsumall(2) = fsumall[2]/count;
k_fsumall.template modify<LMPHostType>();
k_fsumall.template sync<DeviceType>();
// set total force zero in parallel on the device
FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this);
Kokkos::parallel_for(nlocal,zero_functor);
DeviceType::fence();
}
// f is modified by both post_force and zero_force functors
atomKK->modified(execution_space,datamask_modify);
// thermostat omega and angmom
// if (oflag) omega_thermostat();
// if (ascale) angmom_thermostat();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
KOKKOS_INLINE_FUNCTION
FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
{
FSUM fsum;
double fdrag[3],fran[3];
double gamma1,gamma2;
double fswap;
double tsqrt_t = tsqrt;
if (mask[i] & groupbit) {
rand_type rand_gen = rand_pool.get_state();
if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
if(Tp_RMASS){
gamma1 = -rmass[i] / t_period / ftm2v;
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
gamma1 *= 1.0/d_ratio[type[i]];
gamma2 *= 1.0/sqrt(d_ratio[type[i]]) * tsqrt_t;
} else {
gamma1 = d_gfactor1[type[i]];
gamma2 = d_gfactor2[type[i]] * tsqrt_t;
}
fran[0] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
if(Tp_BIAS){
fdrag[0] = gamma1*v(i,0);
fdrag[1] = gamma1*v(i,1);
fdrag[2] = gamma1*v(i,2);
if (v(i,0) == 0.0) fran[0] = 0.0;
if (v(i,1) == 0.0) fran[1] = 0.0;
if (v(i,2) == 0.0) fran[2] = 0.0;
}else{
fdrag[0] = gamma1*v(i,0);
fdrag[1] = gamma1*v(i,1);
fdrag[2] = gamma1*v(i,2);
}
if (Tp_GJF) {
fswap = 0.5*(fran[0]+d_franprev(i,0));
d_franprev(i,0) = fran[0];
fran[0] = fswap;
fswap = 0.5*(fran[1]+d_franprev(i,1));
d_franprev(i,1) = fran[1];
fran[1] = fswap;
fswap = 0.5*(fran[2]+d_franprev(i,2));
d_franprev(i,2) = fran[2];
fran[2] = fswap;
fdrag[0] *= gjffac;
fdrag[1] *= gjffac;
fdrag[2] *= gjffac;
fran[0] *= gjffac;
fran[1] *= gjffac;
fran[2] *= gjffac;
f(i,0) *= gjffac;
f(i,1) *= gjffac;
f(i,2) *= gjffac;
}
f(i,0) += fdrag[0] + fran[0];
f(i,1) += fdrag[1] + fran[1];
f(i,2) += fdrag[2] + fran[2];
if (Tp_TALLY) {
d_flangevin(i,0) = fdrag[0] + fran[0];
d_flangevin(i,1) = fdrag[1] + fran[1];
d_flangevin(i,2) = fdrag[2] + fran[2];
}
if (Tp_ZERO) {
fsum.fx = fran[0];
fsum.fy = fran[1];
fsum.fz = fran[2];
}
rand_pool.free_state(rand_gen);
}
return fsum;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
{
if (mask[i] & groupbit) {
f(i,0) -= d_fsumall[0];
f(i,1) -= d_fsumall[1];
f(i,2) -= d_fsumall[2];
}
}
/* ----------------------------------------------------------------------
set current t_target and t_sqrt
------------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::compute_target()
{
atomKK->sync(Host, MASK_MASK);
mask = atomKK->k_mask.template view<DeviceType>();
int nlocal = atomKK->nlocal;
double delta = update->ntimestep - update->beginstep;
if (delta != 0.0) delta /= update->endstep - update->beginstep;
// if variable temp, evaluate variable, wrap with clear/add
// reallocate tforce array if necessary
if (tstyle == CONSTANT) {
t_target = t_start + delta * (t_stop-t_start);
tsqrt = sqrt(t_target);
} else {
modify->clearstep_compute();
if (tstyle == EQUAL) {
t_target = input->variable->compute_equal(tvar);
if (t_target < 0.0)
error->one(FLERR,"Fix langevin variable returned negative temperature");
tsqrt = sqrt(t_target);
} else {
if (nlocal > maxatom2) {
maxatom2 = atom->nmax;
memory->destroy_kokkos(k_tforce,tforce);
memory->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce");
d_tforce = k_tforce.template view<DeviceType>();
h_tforce = k_tforce.template view<LMPHostType>();
}
input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host
k_tforce.template modify<LMPHostType>();
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
if (h_tforce[i] < 0.0)
error->one(FLERR,
"Fix langevin variable returned negative temperature");
}
modify->addstep_compute(update->ntimestep + 1);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::reset_dt()
{
if (atomKK->mass) {
for (int i = 1; i <= atomKK->ntypes; i++) {
h_gfactor2[i] = sqrt(atomKK->mass[i]) *
sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
force->ftm2v;
h_gfactor2[i] *= 1.0/sqrt(h_ratio[i]);
}
k_gfactor2.template modify<LMPHostType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
double FixLangevinKokkos<DeviceType>::compute_scalar()
{
if (!tallyflag || flangevin == NULL) return 0.0;
v = atomKK->k_v.template view<DeviceType>();
mask = atomKK->k_mask.template view<DeviceType>();
// capture the very first energy transfer to thermal reservoir
if (update->ntimestep == update->beginstep) {
energy_onestep = 0.0;
atomKK->sync(execution_space,V_MASK | MASK_MASK);
int nlocal = atomKK->nlocal;
k_flangevin.template sync<DeviceType>();
FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this);
Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep);
DeviceType::fence();
energy = 0.5*energy_onestep*update->dt;
}
// convert midstep energy back to previous fullstep energy
double energy_me = energy - 0.5*energy_onestep*update->dt;
double energy_all;
MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
return -energy_all;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double FixLangevinKokkos<DeviceType>::compute_energy_item(int i) const
{
double energy;
if (mask[i] & groupbit)
energy = d_flangevin(i,0)*v(i,0) + d_flangevin(i,1)*v(i,1) +
d_flangevin(i,2)*v(i,2);
return energy;
}
/* ----------------------------------------------------------------------
tally energy transfer to thermal reservoir
------------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::end_of_step()
{
if (!tallyflag) return;
v = atomKK->k_v.template view<DeviceType>();
mask = atomKK->k_mask.template view<DeviceType>();
atomKK->sync(execution_space,V_MASK | MASK_MASK);
int nlocal = atomKK->nlocal;
energy_onestep = 0.0;
k_flangevin.template sync<DeviceType>();
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
DeviceType::fence();
energy += energy_onestep*update->dt;
}
/* ----------------------------------------------------------------------
copy values within local atom-based array
------------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
{
for (int m = 0; m < nvalues; m++)
h_franprev(j,m) = h_franprev(i,m);
k_franprev.template modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::cleanup_copy()
{
random = NULL;
tstr = NULL;
gfactor1 = NULL;
gfactor2 = NULL;
ratio = NULL;
id_temp = NULL;
flangevin = NULL;
tforce = NULL;
gjfflag = 0;
franprev = NULL;
id = style = NULL;
vatom = NULL;
}
template class FixLangevinKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixLangevinKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,266 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(langevin/kk,FixLangevinKokkos<LMPDeviceType>)
FixStyle(langevin/kk/device,FixLangevinKokkos<LMPDeviceType>)
FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_LANGEVIN_KOKKOS_H
#define LMP_FIX_LANGEVIN_KOKKOS_H
#include "fix_langevin.h"
#include "kokkos_type.h"
#include "Kokkos_Random.hpp"
#include "comm_kokkos.h"
namespace LAMMPS_NS {
struct s_FSUM {
double fx, fy, fz;
KOKKOS_INLINE_FUNCTION
s_FSUM() {
fx = fy = fz = 0.0;
}
KOKKOS_INLINE_FUNCTION
s_FSUM& operator+=(const s_FSUM &rhs){
fx += rhs.fx;
fy += rhs.fy;
fz += rhs.fz;
return *this;
}
};
typedef s_FSUM FSUM;
template<class DeviceType>
class FixLangevinKokkos;
template<class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
class FixLangevinKokkosPostForceFunctor;
template<class DeviceType> class FixLangevinKokkosZeroForceFunctor;
template<class DeviceType> class FixLangevinKokkosTallyEnergyFunctor;
template<class DeviceType>
class FixLangevinKokkos : public FixLangevin {
public:
FixLangevinKokkos(class LAMMPS *, int, char **);
~FixLangevinKokkos();
void cleanup_copy();
void init();
void post_force(int);
void reset_dt();
void grow_arrays(int);
void copy_arrays(int i, int j, int delflag);
double compute_scalar();
void end_of_step();
template<int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
KOKKOS_INLINE_FUNCTION
FSUM post_force_item(int) const;
KOKKOS_INLINE_FUNCTION
void zero_force_item(int) const;
KOKKOS_INLINE_FUNCTION
double compute_energy_item(int) const;
private:
class CommKokkos *commKK;
class AtomKokkos *atomKK;
double *rmass;
typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev;
typename ArrayTypes<DeviceType>::t_double_2d d_franprev;
HAT::t_double_2d h_franprev;
typename ArrayTypes<DeviceType>::tdual_double_2d k_flangevin;
typename ArrayTypes<DeviceType>::t_double_2d d_flangevin;
HAT::t_double_2d h_flangevin;
typename ArrayTypes<DeviceType>::tdual_double_1d k_tforce;
typename ArrayTypes<DeviceType>::t_double_1d d_tforce;
HAT::t_double_1d h_tforce;
typename ArrayTypes<DeviceType>::t_v_array v;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d type;
typename ArrayTypes<DeviceType>::t_int_1d mask;
typename ArrayTypes<DeviceType>::tdual_double_1d k_gfactor1, k_gfactor2, k_ratio;
typename ArrayTypes<DeviceType>::t_double_1d d_gfactor1, d_gfactor2, d_ratio;
HAT::t_double_1d h_gfactor1, h_gfactor2, h_ratio;
typedef Kokkos::DualView<double[3], DeviceType>
tdual_double_1d_3n;
tdual_double_1d_3n k_fsumall;
typename tdual_double_1d_3n::t_dev d_fsumall;
typename tdual_double_1d_3n::t_host h_fsumall;
double boltz,dt,mvv2e,ftm2v,fran_prop_const;
void compute_target();
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
};
template <class DeviceType,int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY,
int Tp_BIAS, int Tp_RMASS, int Tp_ZERO>
struct FixLangevinKokkosPostForceFunctor {
typedef DeviceType device_type;
typedef FSUM value_type;
FixLangevinKokkos<DeviceType> c;
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {}
~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &fsum) const {
fsum += c.template post_force_item<Tp_TSTYLEATOM,Tp_GJF, Tp_TALLY,
Tp_BIAS,Tp_RMASS,Tp_ZERO>(i);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.fx = 0.0;
update.fy = 0.0;
update.fz = 0.0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.fx += source.fx;
update.fy += source.fy;
update.fz += source.fz;
}
};
template <class DeviceType>
struct FixLangevinKokkosZeroForceFunctor {
typedef DeviceType device_type ;
FixLangevinKokkos<DeviceType> c;
FixLangevinKokkosZeroForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
c.zero_force_item(i);
}
};
template<class DeviceType>
struct FixLangevinKokkosTallyEnergyFunctor {
typedef DeviceType device_type ;
FixLangevinKokkos<DeviceType> c;
typedef double value_type;
FixLangevinKokkosTallyEnergyFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy) const {
energy += c.compute_energy_item(i);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update = 0.0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update += source;
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Fix langevin period must be > 0.0
The time window for temperature relaxation must be > 0
E: Fix langevin omega requires atom style sphere
Self-explanatory.
E: Fix langevin angmom requires atom style ellipsoid
Self-explanatory.
E: Variable name for fix langevin does not exist
Self-explanatory.
E: Variable for fix langevin is invalid style
It must be an equal-style variable.
E: Fix langevin omega requires extended particles
One of the particles has radius 0.0.
E: Fix langevin angmom requires extended particles
This fix option cannot be used with point paritlces.
E: Cannot zero Langevin force of 0 atoms
The group has zero atoms, so you cannot request its force
be zeroed.
E: Fix langevin variable returned negative temperature
Self-explanatory.
E: Could not find fix_modify temperature ID
The compute ID for computing temperature does not exist.
E: Fix_modify temperature ID does not compute temperature
The compute ID assigned to the fix must compute temperature.
W: Group for fix_modify temp != fix group
The fix_modify command is specifying a temperature computation that
computes a temperature on a different group of atoms than the fix
itself operates on. This is probably not what you want to do.
*/

View File

@ -172,6 +172,6 @@ void FixNVEKokkos<DeviceType>::cleanup_copy()
}
template class FixNVEKokkos<LMPDeviceType>;
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template class FixNVEKokkos<LMPHostType>;
#endif

View File

@ -23,8 +23,6 @@
using namespace LAMMPS_NS;
enum{FULL,HALFTHREAD,HALF};
/* ---------------------------------------------------------------------- */
KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
@ -49,13 +47,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
strcmp(arg[iarg],"gpus") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
int ngpu = atoi(arg[iarg+1]);
iarg += 2;
int skip_gpu = 9999;
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
skip_gpu = atoi(arg[iarg+2]);
iarg++;
}
iarg += 2;
char *str;
if (str = getenv("SLURM_LOCALID")) {
@ -89,7 +87,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
// initialize Kokkos
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
Kokkos::Cuda::SelectDevice select_device(device);
Kokkos::Cuda::initialize(select_device);
@ -112,7 +110,7 @@ KokkosLMP::~KokkosLMP()
{
// finalize Kokkos
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
Kokkos::Cuda::finalize();
Kokkos::Cuda::host_mirror_device_type::finalize();
#else

View File

@ -14,71 +14,43 @@
#ifndef LMP_LMPTYPE_KOKKOS_H
#define LMP_LMPTYPE_KOKKOS_H
#include <Kokkos_View.hpp>
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Vectorization.hpp>
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
// set LMPHostype and LMPDeviceType
#ifndef DEVICE
#define DEVICE 1
#ifndef __CUDACC__
struct double2 {
double x, y;
};
struct float2 {
float x, y;
};
struct double4 {
double x, y, z, w;
};
struct float4 {
float x, y, z, w;
};
#endif
#if DEVICE==1
#ifdef KOKKOS_HAVE_OPENMP
#include "Kokkos_OpenMP.hpp"
typedef Kokkos::OpenMP LMPDeviceType;
typedef Kokkos::OpenMP LMPHostType;
#else
#include "Kokkos_Threads.hpp"
typedef Kokkos::Threads LMPDeviceType;
typedef Kokkos::Threads LMPHostType;
#endif
#ifndef __CUDACC__
struct double2 {
double x, y;
};
struct float2 {
float x, y;
};
struct double4 {
double x, y, z, w;
};
struct float4 {
float x, y, z, w;
};
#endif
#else
#include "cuda.h"
#include "cuda_runtime.h"
#include "Kokkos_Cuda.hpp"
#include "Kokkos_Threads.hpp"
typedef Kokkos::Cuda LMPDeviceType;
typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
#endif
// set LMPHostype and LMPDeviceType from Kokkos Default Types
typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
typedef Kokkos::DefaultExecutionSpace::host_mirror_device_type LMPHostType;
// set ExecutionSpace stuct with variable "space"
template<class Device>
struct ExecutionSpaceFromDevice;
#ifdef KOKKOS_HAVE_OPENMP
template<>
struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
struct ExecutionSpaceFromDevice<LMPHostType> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
};
#else
template<>
struct ExecutionSpaceFromDevice<Kokkos::Threads> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
};
#endif
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template<>
struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
@ -142,16 +114,27 @@ struct s_EV_FLOAT {
}
KOKKOS_INLINE_FUNCTION
s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
return *this;
void operator+=(const s_EV_FLOAT &rhs) {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
}
KOKKOS_INLINE_FUNCTION
void operator+=(const volatile s_EV_FLOAT &rhs) volatile {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
}
};
typedef struct s_EV_FLOAT EV_FLOAT;
@ -240,7 +223,7 @@ typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_tagint_1d;
typedef tdual_tagint_1d::t_dev t_tagint_1d;
typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
@ -249,7 +232,16 @@ typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
tdual_tagint_2d;
typedef tdual_tagint_2d::t_dev t_tagint_2d;
typedef tdual_tagint_2d::t_dev_const t_tagint_2d_const;
typedef tdual_tagint_2d::t_dev_um t_tagint_2d_um;
typedef tdual_tagint_2d::t_dev_const_um t_tagint_2d_const_um;
typedef tdual_tagint_2d::t_dev_const_randomread t_tagint_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_imageint_1d;
typedef tdual_imageint_1d::t_dev t_imageint_1d;
typedef tdual_imageint_1d::t_dev_const t_imageint_1d_const;
@ -257,6 +249,22 @@ typedef tdual_imageint_1d::t_dev_um t_imageint_1d_um;
typedef tdual_imageint_1d::t_dev_const_um t_imageint_1d_const_um;
typedef tdual_imageint_1d::t_dev_const_randomread t_imageint_1d_randomread;
typedef Kokkos::
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
typedef tdual_double_1d::t_dev t_double_1d;
typedef tdual_double_1d::t_dev_const t_double_1d_const;
typedef tdual_double_1d::t_dev_um t_double_1d_um;
typedef tdual_double_1d::t_dev_const_um t_double_1d_const_um;
typedef tdual_double_1d::t_dev_const_randomread t_double_1d_randomread;
typedef Kokkos::
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
typedef tdual_double_2d::t_dev t_double_2d;
typedef tdual_double_2d::t_dev_const t_double_2d_const;
typedef tdual_double_2d::t_dev_um t_double_2d_um;
typedef tdual_double_2d::t_dev_const_um t_double_2d_const_um;
typedef tdual_double_2d::t_dev_const_randomread t_double_2d_randomread;
// 1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
@ -406,7 +414,7 @@ typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
};
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template <>
struct ArrayTypes<LMPHostType> {
@ -446,13 +454,40 @@ typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
typedef Kokkos::DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_imageint_1d;
typedef Kokkos::
DualView<LAMMPS_NS::tagint**, LMPDeviceType::array_layout, LMPDeviceType>
tdual_tagint_2d;
typedef tdual_tagint_2d::t_host t_tagint_2d;
typedef tdual_tagint_2d::t_host_const t_tagint_2d_const;
typedef tdual_tagint_2d::t_host_um t_tagint_2d_um;
typedef tdual_tagint_2d::t_host_const_um t_tagint_2d_const_um;
typedef tdual_tagint_2d::t_host_const_randomread t_tagint_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::imageint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_imageint_1d;
typedef tdual_imageint_1d::t_host t_imageint_1d;
typedef tdual_imageint_1d::t_host_const t_imageint_1d_const;
typedef tdual_imageint_1d::t_host_um t_imageint_1d_um;
typedef tdual_imageint_1d::t_host_const_um t_imageint_1d_const_um;
typedef tdual_imageint_1d::t_host_const_randomread t_imageint_1d_randomread;
typedef Kokkos::
DualView<double*, Kokkos::LayoutRight, LMPDeviceType> tdual_double_1d;
typedef tdual_double_1d::t_host t_double_1d;
typedef tdual_double_1d::t_host_const t_double_1d_const;
typedef tdual_double_1d::t_host_um t_double_1d_um;
typedef tdual_double_1d::t_host_const_um t_double_1d_const_um;
typedef tdual_double_1d::t_host_const_randomread t_double_1d_randomread;
typedef Kokkos::
DualView<double**, Kokkos::LayoutRight, LMPDeviceType> tdual_double_2d;
typedef tdual_double_2d::t_host t_double_2d;
typedef tdual_double_2d::t_host_const t_double_2d_const;
typedef tdual_double_2d::t_host_um t_double_2d_um;
typedef tdual_double_2d::t_host_const_um t_double_2d_const_um;
typedef tdual_double_2d::t_host_const_randomread t_double_2d_randomread;
//1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
typedef tdual_float_1d::t_host t_float_1d;

View File

@ -13,6 +13,7 @@
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
using namespace LAMMPS_NS;
@ -24,7 +25,7 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
NeighborKokkosExecute<DeviceType>
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
@ -33,14 +34,46 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
bboxhi,bboxlo);
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
@ -78,24 +111,24 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
#endif
if(newton_pair) {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
@ -134,6 +167,62 @@ void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
}
}
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
const int n3 = nspecial(i,2);
for (int k = 0; k < n3; k++) {
if (special(i,k) == tag(j)) {
if (k < n1) {
if (special_flag[1] == 0) return -1;
else if (special_flag[1] == 1) return 0;
else return 1;
} else if (k < n2) {
if (special_flag[2] == 0) return -1;
else if (special_flag[2] == 1) return 0;
else return 2;
} else {
if (special_flag[3] == 0) return -1;
else if (special_flag[3] == 1) return 0;
else return 3;
}
}
}
return 0;
};
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
if (nex_type && ex_type(itype,jtype)) return 1;
if (nex_group) {
for (m = 0; m < nex_group; m++) {
if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
}
}
if (nex_mol) {
for (m = 0; m < nex_mol; m++)
if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
molecule(i) == molecule(j)) return 1;
}
return 0;
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh,int GhostNewton>
@ -142,7 +231,10 @@ void NeighborKokkosExecute<Device>::
{
/* if necessary, goto next page and add pages */
int n = 0;
int which = 0;
int moltemplate;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
@ -161,52 +253,81 @@ void NeighborKokkosExecute<Device>::
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
// printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
n++;
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
}
}
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
if(HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
//if(i==0)
//printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
const int jtype = type(j);
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
//if(i==0)
//printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
n++;
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
}
}
@ -222,23 +343,23 @@ void NeighborKokkosExecute<Device>::
neigh_list.d_ilist(i) = i;
}
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh>
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.dimension_1();
const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
const int MY_BIN = threadIdx.x/atoms_per_bin;
const int MY_II = threadIdx.x%atoms_per_bin;
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
const int MY_BIN = dev.team_rank()/atoms_per_bin;
const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
if(ibin >=c_bincount.dimension_0()) return;
X_FLOAT* other_x = sharedmem;
@ -248,6 +369,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
int bincount_current = c_bincount[ibin];
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
@ -278,17 +401,45 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
//if(j==i) continue;
if((j == i) || (HalfNeigh && (j < i))) continue;
if((j == i) ||
(HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const int jtype = other_x[m + 3 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
}
}
}
__syncthreads();
@ -319,15 +470,41 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
if(HalfNeigh && (j < i)) continue;
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const int jtype = other_x[m + 3 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
}
}
}
}
__syncthreads();
@ -343,6 +520,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
if(n >= new_maxneighs()) new_maxneighs() = n;
}
}
}
#endif
@ -361,12 +539,45 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
bboxhi,bboxlo);
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
DeviceType::fence();
@ -407,24 +618,24 @@ void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
#endif
if(newton_pair) {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#if DEVICE==2
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
} else {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#if DEVICE==2
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);

View File

@ -113,6 +113,6 @@ void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
}
template class NeighListKokkos<LMPDeviceType>;
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template class NeighListKokkos<LMPHostType>;
#endif

View File

@ -20,7 +20,7 @@
namespace LAMMPS_NS {
enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
class AtomNeighbors
{

View File

@ -51,6 +51,16 @@ NeighborKokkos::~NeighborKokkos()
delete [] pair_build_device;
delete [] pair_build_host;
memory->destroy_kokkos(k_ex1_type,ex1_type);
memory->destroy_kokkos(k_ex2_type,ex2_type);
memory->destroy_kokkos(k_ex1_group,ex1_group);
memory->destroy_kokkos(k_ex2_group,ex2_group);
memory->destroy_kokkos(k_ex_mol_group,ex_mol_group);
memory->destroy_kokkos(k_ex1_bit,ex1_bit);
memory->destroy_kokkos(k_ex2_bit,ex2_bit);
memory->destroy_kokkos(k_ex_mol_bit,ex_mol_bit);
}
/* ---------------------------------------------------------------------- */
@ -72,7 +82,7 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
{
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
@ -211,6 +221,32 @@ void NeighborKokkos::init_list_grow_kokkos(int i)
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_ex_type_kokkos(int n)
{
memory->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type");
k_ex_type.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_ex_bit_kokkos()
{
memory->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit");
k_ex1_bit.modify<LMPHostType>();
memory->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit");
k_ex2_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_ex_mol_bit_kokkos()
{
memory->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit");
k_ex_mol_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
@ -264,6 +300,29 @@ void NeighborKokkos::setup_bins_kokkos(int i)
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
memory->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
k_ex2_type.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_group_grow_kokkos(){
memory->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
k_ex1_group.modify<LMPHostType>();
memory->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
k_ex2_group.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_mol_group_grow_kokkos(){
memory->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
k_ex_mol_group.modify<LMPHostType>();
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -17,6 +17,7 @@
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "kokkos_type.h"
#include <math.h>
namespace LAMMPS_NS {
@ -33,8 +34,15 @@ class NeighborKokkosExecute
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask;
const typename AT::t_tagint_1d_const molecule;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
@ -44,38 +52,88 @@ class NeighborKokkosExecute
const int nlocal;
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_tagint_1d_const &_molecule,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_UVM
h_resize = Kokkos::create_mirror_view(resize);
@ -103,10 +161,10 @@ class NeighborKokkosExecute
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#if DEVICE==2
template<int HalfNeigh>
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int GhostNewton>
__device__ inline
void build_ItemCuda(Device dev) const;
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
@ -143,6 +201,21 @@ class NeighborKokkosExecute
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class Device>
@ -175,12 +248,12 @@ struct NeighborKokkosBuildFunctor {
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
KOKKOS_INLINE_FUNCTION
void operator() (Device dev) const {
c.template build_ItemCuda<HALF_NEIGH>(dev);
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size() const { return sharedsize; }
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
@ -220,15 +293,28 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void init_ex_type_kokkos(int);
void init_ex_bit_kokkos();
void init_ex_mol_bit_kokkos();
void choose_build(int, NeighRequest *);
void build_kokkos(int);
void setup_bins_kokkos(int);
void modify_ex_type_grow_kokkos();
void modify_ex_group_grow_kokkos();
void modify_mol_group_grow_kokkos();
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;

View File

@ -0,0 +1,266 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_coul_cut_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairCoulCutKokkos<DeviceType>::PairCoulCutKokkos(LAMMPS *lmp) : PairCoulCut(lmp)
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
cutsq = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairCoulCutKokkos<DeviceType>::~PairCoulCutKokkos()
{
if (allocated)
memory->destroy_kokkos(k_cutsq, cutsq);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulCutKokkos<DeviceType>::cleanup_copy() {
// WHY needed: this prevents parent copy from deallocating any arrays
allocated = 0;
cutsq = NULL;
eatom = NULL;
vatom = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
double ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_cut_ljsq.template sync<DeviceType>();
k_cut_coulsq.template sync<DeviceType>();
k_params.template sync<DeviceType>();
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
x = atomKK->k_x.view<DeviceType>();
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
newton_pair = force->newton_pair;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
special_coul[0] = force->special_coul[0];
special_coul[1] = force->special_coul[1];
special_coul[2] = force->special_coul[2];
special_coul[3] = force->special_coul[3];
qqrd2e = force->qqrd2e;
// loop over neighbors of my atoms
EV_FLOAT ev = pair_compute<PairCoulCutKokkos<DeviceType>,void >
(this,(NeighListKokkos<DeviceType>*)list);
DeviceType::fence();
if (eflag) eng_coul += ev.ecoul;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
}
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairCoulCutKokkos<DeviceType>::
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT rinv = sqrt(r2inv);
F_FLOAT forcecoul;
forcecoul = qqrd2e*(STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)*
qtmp *q(j) *rinv;
return factor_coul*forcecoul*r2inv;
}
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairCoulCutKokkos<DeviceType>::
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT rinv = sqrt(r2inv);
return factor_coul*qqrd2e * (STACKPARAMS?m_params[itype][jtype].scale:params(itype,jtype).scale)
* qtmp *q(j)*rinv;
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulCutKokkos<DeviceType>::allocate()
{
PairCoulCut::allocate();
int n = atom->ntypes;
memory->destroy(cutsq);
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
k_cut_ljsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_ljsq",n+1,n+1);
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
k_cut_coulsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_coulsq",n+1,n+1);
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulCut::params",n+1,n+1);
params = k_params.d_view;
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
{
// \todo check what should be the limit on narg
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
PairCoulCut::settings(1,arg);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulCutKokkos<DeviceType>::init_style()
{
PairCoulCut::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
}
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
template<class DeviceType>
double PairCoulCutKokkos<DeviceType>::init_one(int i, int j)
{
double cutone = PairCoulCut::init_one(i,j);
k_params.h_view(i,j).scale = scale[i][j];
k_params.h_view(i,j).cutsq = cutone*cutone;
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cutone*cutone;
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cutone*cutone;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.template modify<LMPHostType>();
k_cut_ljsq.h_view(i,j) = cutone*cutone;
k_cut_ljsq.template modify<LMPHostType>();
k_cut_coulsq.h_view(i,j) = cutone*cutone;
k_cut_coulsq.template modify<LMPHostType>();
k_params.template modify<LMPHostType>();
return cutone;
}
template class PairCoulCutKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairCoulCutKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,137 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(coul/cut/kk,PairCoulCutKokkos<LMPDeviceType>)
PairStyle(coul/cut/kk/device,PairCoulCutKokkos<LMPDeviceType>)
PairStyle(coul/cut/kk/host,PairCoulCutKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_COUL_CUT_KOKKOS_H
#define LMP_PAIR_COUL_CUT_KOKKOS_H
#include "pair_kokkos.h"
#include "pair_coul_cut.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairCoulCutKokkos : public PairCoulCut {
public:
enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF};
enum {COUL_FLAG=1};
typedef DeviceType device_type;
PairCoulCutKokkos(class LAMMPS *);
~PairCoulCutKokkos();
void compute(int, int);
void settings(int, char **);
void init_style();
double init_one(int, int);
struct params_coul{
params_coul(){cutsq=0,scale=0;};
params_coul(int i){cutsq=0,scale=0;};
F_FLOAT cutsq, scale;
};
protected:
void cleanup_copy();
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const {
return 0.0;
}
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const{
return 0;
}
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_coul**,
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
// hardwired to space for 15 atom types
params_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array c_x;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
int newton_pair;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
class AtomKokkos *atomKK;
int neighflag;
int nlocal,nall,eflag,vflag;
double special_coul[4];
double special_lj[4];
double qqrd2e;
void allocate();
friend class PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
friend class PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairCoulCutKokkos,N2,true>;
friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairCoulCutKokkos,N2,false>;
friend class PairComputeFunctor<PairCoulCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -25,27 +25,72 @@
namespace LAMMPS_NS {
template<int Table>
struct CoulLongTable {
enum {DoTable = Table};
};
// Tags for doing coulomb calculations or not
// They facilitate function overloading, since
// partial template specialization of member functions is not allowed
struct CoulTag {};
struct NoCoulTag {};
template<int FLAG>
struct DoCoul {
typedef NoCoulTag type;
};
template<>
struct DoCoul<1> {
typedef CoulTag type;
};
// Determine memory traits for force array
// Do atomic trait when running HALFTHREAD neighbor list style
template<int NEIGHFLAG>
struct AtomicF {
enum {value = Kokkos::Unmanaged};
};
template<>
struct AtomicF<HALFTHREAD> {
enum {value = Kokkos::Atomic|Kokkos::Unmanaged};
};
//Specialisation for Neighborlist types Half, HalfThread, Full
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
struct PairComputeFunctor {
typedef typename PairStyle::device_type device_type ;
// Reduction type, contains evdwl, ecoul and virial[6]
typedef EV_FLOAT value_type;
// The copy of the pair style
PairStyle c;
// The force array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
c(*c_ptr),f(c.f),list(*list_ptr) {};
// Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
// Loop over neighbors of one atom without coulomb interaction
// This function is called in parallel
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& ii,
const NeighListKokkos<device_type> &list) const {
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
EV_FLOAT ev;
const int i = list.d_ilist[ii];
const X_FLOAT xtmp = c.x(i,0);
@ -77,25 +122,17 @@ struct PairComputeFunctor {
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
}
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
c.f(j,0) -= delx*fpair;
c.f(j,1) -= dely*fpair;
c.f(j,2) -= delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
f(j,0) -= delx*fpair;
f(j,1) -= dely*fpair;
f(j,2) -= delz*fpair;
}
if (EVFLAG) {
if (c.eflag) {
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@ -103,16 +140,84 @@ struct PairComputeFunctor {
}
}
if (NEIGHFLAG == HALFTHREAD) {
Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
} else {
c.f(i,0) += fxtmp;
c.f(i,1) += fytmp;
c.f(i,2) += fztmp;
f(i,0) += fxtmp;
f(i,1) += fytmp;
f(i,2) += fztmp;
return ev;
}
// Loop over neighbors of one atom with coulomb interaction
// This function is called in parallel
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& ii,
const NeighListKokkos<device_type> &list, const CoulTag& ) const {
EV_FLOAT ev;
const int i = list.d_ilist[ii];
const X_FLOAT xtmp = c.x(i,0);
const X_FLOAT ytmp = c.x(i,1);
const X_FLOAT ztmp = c.x(i,2);
const int itype = c.type(i);
const F_FLOAT qtmp = c.q(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = neighbors_i(jj);
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
const F_FLOAT factor_coul = c.special_coul[sbmask(j)];
j &= NEIGHMASK;
const X_FLOAT delx = xtmp - c.x(j,0);
const X_FLOAT dely = ytmp - c.x(j,1);
const X_FLOAT delz = ztmp - c.x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
F_FLOAT fpair = F_FLOAT();
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
fpair+=factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
f(j,0) -= delx*fpair;
f(j,1) -= dely*fpair;
f(j,2) -= delz*fpair;
}
if (EVFLAG) {
if (c.eflag) {
if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype)))
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
}
}
}
f(i,0) += fxtmp;
f(i,1) += fytmp;
f(i,2) += fztmp;
return ev;
}
@ -142,7 +247,7 @@ struct PairComputeFunctor {
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
if (NEIGHFLAG) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR) {
ev.v[0] += v0;
ev.v[1] += v1;
@ -202,43 +307,17 @@ struct PairComputeFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (c.newton_pair) compute_item<0,1>(i,list);
else compute_item<0,0>(i,list);
if (c.newton_pair) compute_item<0,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(i,list);
energy_virial += compute_item<1,1>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(i,list);
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
@ -261,8 +340,8 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const device_type& dev,
const NeighListKokkos<device_type> &list) const {
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
const int i = vectorization::global_thread_rank(dev);
@ -302,9 +381,6 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
if (c.eflag) {
ev.evdwl += 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += 0.5*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@ -373,44 +449,18 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
}
KOKKOS_INLINE_FUNCTION
void operator()(const device_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list);
else compute_item<0,0>(dev,list);
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const device_type& dev, value_type &energy_virial) const {
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list);
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(dev,list);
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
@ -433,7 +483,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& ii,
const NeighListKokkos<device_type> &list) const {
const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
(void) list;
EV_FLOAT ev;
const int i = ii;//list.d_ilist[ii];
const X_FLOAT xtmp = c.x(i,0);
@ -470,9 +521,6 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
if (c.eflag) {
ev.evdwl += 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += 0.5*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
@ -535,116 +583,156 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
compute_item<0,0>(i,list);
compute_item<0,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
energy_virial += compute_item<1,0>(i,list);
energy_virial += compute_item<1,0>(i,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
// Filter out Neighflags which are not supported for PairStyle
// The enable_if clause will invalidate the last parameter of the function, so that
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
// This uses the fact that failure to match template parameters is not an error.
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(NEIGHFLAG&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",NEIGHFLAG,PairStyle::EnabledNeighFlags);
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!(FULLCLUSTER&PairStyle::EnabledNeighFlags), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i",FULLCLUSTER,PairStyle::EnabledNeighFlags);
return ev;
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<NEIGHFLAG&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else {
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
}
return ev;
}
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<FULLCLUSTER&PairStyle::EnabledNeighFlags, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
if (fpair->neighflag == FULL) {
PairComputeFunctor<PairStyle,FULL,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALFTHREAD) {
PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALF) {
PairComputeFunctor<PairStyle,HALF,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == N2) {
PairComputeFunctor<PairStyle,N2,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
else Kokkos::parallel_for(fpair->nlocal,ff);
} else if (fpair->neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
}
} else {
if (fpair->neighflag == FULL) {
PairComputeFunctor<PairStyle,FULL,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALFTHREAD) {
PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALF) {
PairComputeFunctor<PairStyle,HALF,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == N2) {
PairComputeFunctor<PairStyle,N2,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
else Kokkos::parallel_for(fpair->nlocal,ff);
} else if (fpair->neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
}
if (fpair->neighflag == FULL) {
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
} else if (fpair->neighflag == HALFTHREAD) {
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
} else if (fpair->neighflag == HALF) {
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
} else if (fpair->neighflag == N2) {
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
} else if (fpair->neighflag == FULLCLUSTER) {
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
}
return ev;
}
template<class DeviceType>
struct PairVirialFDotRCompute {
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
typename AT::t_x_array_const x;
typename AT::t_f_array_const f;
const int offset;
PairVirialFDotRCompute( typename AT::t_x_array_const x_,
typename AT::t_f_array_const f_,
const int offset_):x(x_),f(f_),offset(offset_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int j, value_type &energy_virial) const {
const int i = j + offset;
energy_virial.v[0] += f(i,0)*x(i,0);
energy_virial.v[1] += f(i,1)*x(i,1);
energy_virial.v[2] += f(i,2)*x(i,2);
energy_virial.v[3] += f(i,1)*x(i,0);
energy_virial.v[4] += f(i,2)*x(i,0);
energy_virial.v[5] += f(i,2)*x(i,1);
}
};
template<class PairStyle>
void pair_virial_fdotr_compute(PairStyle* fpair) {
EV_FLOAT virial;
if (fpair->neighbor->includegroup == 0) {
int nall = fpair->atom->nlocal + fpair->atom->nghost;
Kokkos::parallel_reduce(nall,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
} else {
Kokkos::parallel_reduce(fpair->atom->nfirst,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,0),virial);
EV_FLOAT virial_ghost;
Kokkos::parallel_reduce(fpair->atom->nghost,PairVirialFDotRCompute<typename PairStyle::device_type>(fpair->x,fpair->f,fpair->atom->nlocal),virial_ghost);
virial+=virial_ghost;
}
fpair->vflag_fdotr = 0;
fpair->virial[0] = virial.v[0];
fpair->virial[1] = virial.v[1];
fpair->virial[2] = virial.v[2];
fpair->virial[3] = virial.v[3];
fpair->virial[4] = virial.v[4];
fpair->virial[5] = virial.v[5];
}
}
#endif

View File

@ -0,0 +1,347 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_lj_cut_coul_cut_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutCoulCutKokkos<DeviceType>::PairLJCutCoulCutKokkos(LAMMPS *lmp):PairLJCutCoulCut(lmp)
{
respa_enable = 0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
cutsq = NULL;
cut_ljsq = NULL;
cut_coulsq = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutCoulCutKokkos<DeviceType>::~PairLJCutCoulCutKokkos()
{
if (allocated){
memory->destroy_kokkos(k_cutsq, cutsq);
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
memory->destroy_kokkos(k_cut_coulsq, cut_coulsq);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulCutKokkos<DeviceType>::cleanup_copy() {
// WHY needed: this prevents parent copy from deallocating any arrays
allocated = 0;
cutsq = NULL;
cut_ljsq = NULL;
cut_coulsq = NULL;
eatom = NULL;
vatom = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
double evdwl = 0.0;
double ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_cut_ljsq.template sync<DeviceType>();
k_cut_coulsq.template sync<DeviceType>();
k_params.template sync<DeviceType>();
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
x = atomKK->k_x.view<DeviceType>();
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
special_coul[0] = force->special_coul[0];
special_coul[1] = force->special_coul[1];
special_coul[2] = force->special_coul[2];
special_coul[3] = force->special_coul[3];
qqrd2e = force->qqrd2e;
newton_pair = force->newton_pair;
// loop over neighbors of my atoms
EV_FLOAT ev = pair_compute<PairLJCutCoulCutKokkos<DeviceType>,void >
(this,(NeighListKokkos<DeviceType>*)list);
DeviceType::fence();
if (eflag) {
eng_vdwl += ev.evdwl;
eng_coul += ev.ecoul;
}
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
compute LJ 12-6 pair force between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
F_FLOAT forcelj;
forcelj = r6inv *
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
return forcelj*r2inv;
}
/* ----------------------------------------------------------------------
compute coulomb pair force between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT rinv = sqrt(r2inv);
F_FLOAT forcecoul;
forcecoul = qqrd2e*qtmp*q(j) *rinv;
return factor_coul*forcecoul*r2inv;
}
/* ----------------------------------------------------------------------
compute LJ 12-6 pair potential energy between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
return r6inv*
((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
- (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
- (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
}
/* ----------------------------------------------------------------------
compute coulomb pair potential energy between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulCutKokkos<DeviceType>::
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT rinv = sqrt(r2inv);
return factor_coul*qqrd2e*qtmp*q(j)*rinv;
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulCutKokkos<DeviceType>::allocate()
{
PairLJCutCoulCut::allocate();
int n = atom->ntypes;
memory->destroy(cutsq);
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
memory->destroy(cut_ljsq);
memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
memory->destroy(cut_coulsq);
memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq");
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulCut::params",n+1,n+1);
params = k_params.d_view;
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulCutKokkos<DeviceType>::settings(int narg, char **arg)
{
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
PairLJCutCoulCut::settings(1,arg);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulCutKokkos<DeviceType>::init_style()
{
PairLJCutCoulCut::init_style();
// error if rRESPA with inner levels
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
int respa = 0;
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
if (respa)
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
}
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
template<class DeviceType>
double PairLJCutCoulCutKokkos<DeviceType>::init_one(int i, int j)
{
double cutone = PairLJCutCoulCut::init_one(i,j);
double cut_ljsqm = cut_ljsq[i][j];
double cut_coulsqm = cut_coulsq[i][j];
k_params.h_view(i,j).lj1 = lj1[i][j];
k_params.h_view(i,j).lj2 = lj2[i][j];
k_params.h_view(i,j).lj3 = lj3[i][j];
k_params.h_view(i,j).lj4 = lj4[i][j];
k_params.h_view(i,j).offset = offset[i][j];
k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.template modify<LMPHostType>();
k_cut_ljsq.h_view(i,j) = cut_ljsqm;
k_cut_ljsq.template modify<LMPHostType>();
k_cut_coulsq.h_view(i,j) = cut_coulsqm;
k_cut_coulsq.template modify<LMPHostType>();
k_params.template modify<LMPHostType>();
return cutone;
}
template class PairLJCutCoulCutKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairLJCutCoulCutKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,131 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/coul/cut/kk,PairLJCutCoulCutKokkos<LMPDeviceType>)
PairStyle(lj/cut/coul/cut/kk/device,PairLJCutCoulCutKokkos<LMPDeviceType>)
PairStyle(lj/cut/coul/cut/kk/host,PairLJCutCoulCutKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
#define LMP_PAIR_LJ_CUT_COUL_CUT_KOKKOS_H
#include "pair_kokkos.h"
#include "pair_lj_cut_coul_cut.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
enum {COUL_FLAG=1};
typedef DeviceType device_type;
PairLJCutCoulCutKokkos(class LAMMPS *);
~PairLJCutCoulCutKokkos();
void compute(int, int);
void settings(int, char **);
void init_style();
double init_one(int, int);
struct params_lj_coul{
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
};
protected:
void cleanup_copy();
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_lj_coul**,
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
// hardwired to space for 15 atom types
params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array c_x;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
int newton_pair;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
class AtomKokkos *atomKK;
int neighflag;
int nlocal,nall,eflag,vflag;
double special_coul[4];
double special_lj[4];
double qqrd2e;
void allocate();
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>;
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*,
NeighListKokkos<DeviceType>*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,496 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_lj_cut_coul_long_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJCutCoulLong(lmp)
{
respa_enable = 0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
cutsq = NULL;
cut_ljsq = NULL;
cut_coulsq = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos()
{
if (allocated){
memory->destroy_kokkos(k_cutsq, cutsq);
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::cleanup_copy() {
// WHY needed: this prevents parent copy from deallocating any arrays
allocated = 0;
cutsq = NULL;
cut_ljsq = NULL;
eatom = NULL;
vatom = NULL;
ftable = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
double evdwl = 0.0;
double ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_cut_ljsq.template sync<DeviceType>();
k_cut_coulsq.template sync<DeviceType>();
k_params.template sync<DeviceType>();
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
x = atomKK->k_x.view<DeviceType>();
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
special_coul[0] = force->special_coul[0];
special_coul[1] = force->special_coul[1];
special_coul[2] = force->special_coul[2];
special_coul[3] = force->special_coul[3];
qqrd2e = force->qqrd2e;
newton_pair = force->newton_pair;
// loop over neighbors of my atoms
EV_FLOAT ev;
if(ncoultablebits)
ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<1> >
(this,(NeighListKokkos<DeviceType>*)list);
else
ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<0> >
(this,(NeighListKokkos<DeviceType>*)list);
DeviceType::fence();
if (eflag) {
eng_vdwl += ev.evdwl;
eng_coul += ev.ecoul;
}
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
}
/* ----------------------------------------------------------------------
compute LJ 12-6 pair force between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
F_FLOAT forcelj;
forcelj = r6inv *
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
return forcelj*r2inv;
}
/* ----------------------------------------------------------------------
compute coulomb pair force between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
if(Specialisation::DoTable && rsq > tabinnersq) {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
F_FLOAT forcecoul = qtmp*q[j] * table;
if (factor_coul < 1.0) {
const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
const F_FLOAT prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
}
return forcecoul/rsq;
} else {
const F_FLOAT r = sqrt(rsq);
const F_FLOAT grij = g_ewald * r;
const F_FLOAT expm2 = exp(-grij*grij);
const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
const F_FLOAT rinv = 1.0/r;
const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
return forcecoul*rinv*rinv;
}
}
/* ----------------------------------------------------------------------
compute LJ 12-6 pair potential energy between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
return r6inv*
((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv
- (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4))
- (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
}
/* ----------------------------------------------------------------------
compute coulomb pair potential energy between atoms i and j
---------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutCoulLongKokkos<DeviceType>::
compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
if(Specialisation::DoTable) {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
F_FLOAT ecoul = qtmp*q[j] * table;
if (factor_coul < 1.0) {
const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
const F_FLOAT prefactor = qtmp*q[j] * table;
ecoul -= (1.0-factor_coul)*prefactor;
}
return ecoul;
} else {
const F_FLOAT r = sqrt(rsq);
const F_FLOAT grij = g_ewald * r;
const F_FLOAT expm2 = exp(-grij*grij);
const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
F_FLOAT ecoul = prefactor * erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
return ecoul;
}
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::allocate()
{
PairLJCutCoulLong::allocate();
int n = atom->ntypes;
memory->destroy(cutsq);
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
memory->destroy(cut_ljsq);
memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq");
d_cut_coulsq = k_cut_coulsq.template view<DeviceType>();
k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulLong::params",n+1,n+1);
params = k_params.d_view;
}
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
{
Pair::init_tables(cut_coul,cut_respa);
typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
int ntable = 1;
for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
// Copy rtable and drtable
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
for(int i = 0; i < ntable; i++) {
h_table(i) = rtable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_rtable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
for(int i = 0; i < ntable; i++) {
h_table(i) = drtable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_drtable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
// Copy ftable and dftable
for(int i = 0; i < ntable; i++) {
h_table(i) = ftable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_ftable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
for(int i = 0; i < ntable; i++) {
h_table(i) = dftable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_dftable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
// Copy ctable and dctable
for(int i = 0; i < ntable; i++) {
h_table(i) = ctable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_ctable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
for(int i = 0; i < ntable; i++) {
h_table(i) = dctable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_dctable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
// Copy etable and detable
for(int i = 0; i < ntable; i++) {
h_table(i) = etable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_etable = d_table;
}
{
host_table_type h_table("HostTable",ntable);
table_type d_table("DeviceTable",ntable);
for(int i = 0; i < ntable; i++) {
h_table(i) = detable[i];
}
Kokkos::deep_copy(d_table,h_table);
d_detable = d_table;
}
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::settings(int narg, char **arg)
{
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
PairLJCutCoulLong::settings(narg,arg);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutCoulLongKokkos<DeviceType>::init_style()
{
PairLJCutCoulLong::init_style();
// error if rRESPA with inner levels
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
int respa = 0;
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
if (respa)
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
}
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
}
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
template<class DeviceType>
double PairLJCutCoulLongKokkos<DeviceType>::init_one(int i, int j)
{
double cutone = PairLJCutCoulLong::init_one(i,j);
double cut_ljsqm = cut_ljsq[i][j];
double cut_coulsqm = cut_coulsq;
k_params.h_view(i,j).lj1 = lj1[i][j];
k_params.h_view(i,j).lj2 = lj2[i][j];
k_params.h_view(i,j).lj3 = lj3[i][j];
k_params.h_view(i,j).lj4 = lj4[i][j];
k_params.h_view(i,j).offset = offset[i][j];
k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.template modify<LMPHostType>();
k_cut_ljsq.h_view(i,j) = cut_ljsqm;
k_cut_ljsq.template modify<LMPHostType>();
k_cut_coulsq.h_view(i,j) = cut_coulsqm;
k_cut_coulsq.template modify<LMPHostType>();
k_params.template modify<LMPHostType>();
return cutone;
}
template class PairLJCutCoulLongKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairLJCutCoulLongKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,147 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/coul/long/kk,PairLJCutCoulLongKokkos<LMPDeviceType>)
PairStyle(lj/cut/coul/long/kk/device,PairLJCutCoulLongKokkos<LMPDeviceType>)
PairStyle(lj/cut/coul/long/kk/host,PairLJCutCoulLongKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
#define LMP_PAIR_LJ_CUT_COUL_LONG_KOKKOS_H
#include "pair_kokkos.h"
#include "pair_lj_cut_coul_long.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
enum {COUL_FLAG=1};
typedef DeviceType device_type;
PairLJCutCoulLongKokkos(class LAMMPS *);
~PairLJCutCoulLongKokkos();
void compute(int, int);
void settings(int, char **);
void init_tables(double cut_coul, double *cut_respa);
void init_style();
double init_one(int, int);
struct params_lj_coul{
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
params_lj_coul(int i){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
};
protected:
void cleanup_copy();
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_lj_coul**,
Kokkos::LayoutRight,DeviceType>::t_dev_const params;
// hardwired to space for 15 atom types
params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array c_x;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
int newton_pair;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_ljsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_ljsq;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cut_coulsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cut_coulsq;
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread
d_rtable, d_drtable, d_ftable, d_dftable,
d_ctable, d_dctable, d_etable, d_detable;
class AtomKokkos *atomKK;
int neighflag;
int nlocal,nall,eflag,vflag;
double special_coul[4];
double special_lj[4];
double qqrd2e;
void allocate();
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >;
friend class PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -99,18 +99,18 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
tag = atomKK->k_tag.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
newton_pair = force->newton_pair;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
newton_pair = force->newton_pair;
// loop over neighbors of my atoms
EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
DeviceType::fence();
if (eflag) eng_vdwl += ev.evdwl;
@ -123,7 +123,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
virial[5] += ev.v[5];
}
if (vflag_fdotr) virial_fdotr_compute();
if (vflag_fdotr) pair_virial_fdotr_compute(this);
}
template<class DeviceType>
@ -131,12 +131,15 @@ template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutKokkos<DeviceType>::
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
(void) i;
(void) j;
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
const F_FLOAT forcelj = r6inv *
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
return forcelj*r2inv;
}
@ -145,8 +148,11 @@ template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutKokkos<DeviceType>::
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
(void) i;
(void) j;
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
(STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
@ -262,6 +268,6 @@ double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
template class PairLJCutKokkos<LMPDeviceType>;
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template class PairLJCutKokkos<LMPHostType>;
#endif

View File

@ -31,6 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
@ -76,9 +77,10 @@ class PairLJCutKokkos : public PairLJCut {
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
//typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
typename ArrayTypes<DeviceType>::t_tagint_1d tag;
int newton_pair;
double special_lj[4];
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
@ -98,8 +100,13 @@ class PairLJCutKokkos : public PairLJCut {
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
};
}

View File

@ -153,8 +153,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
} else {
if (neighflag == FULL) {
@ -187,8 +188,9 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (eflag || vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),f,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),f);
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
}
DeviceType::fence();
@ -203,7 +205,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
virial[5] += ev.v[5];
}
if (vflag_fdotr) virial_fdotr_compute();
if (vflag_fdotr) pair_virial_fdotr_compute(this);
}
template<class DeviceType>
@ -211,6 +213,8 @@ template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairTableKokkos<DeviceType>::
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
(void) i;
(void) j;
union_int_float_t rsq_lookup;
double fpair;
const int tidx = d_table_const.tabindex(itype,jtype);
@ -254,6 +258,8 @@ template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairTableKokkos<DeviceType>::
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
(void) i;
(void) j;
double evdwl;
union_int_float_t rsq_lookup;
const int tidx = d_table_const.tabindex(itype,jtype);
@ -292,128 +298,6 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c
return evdwl;
}
/*
template<class DeviceType>
template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR,int TABSTYLE>
KOKKOS_FUNCTION
EV_FLOAT PairTableKokkos<DeviceType>::
compute_item(const int &ii, const NeighListKokkos<DeviceType> &list) const
{
EV_FLOAT ev;
const int tlm1 = tablength - 1;
union_int_float_t rsq_lookup;
const int i = list.d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = neighbors_i(jj);
const F_FLOAT factor_lj = 1.0; //special_lj[sbmask(j)];
j &= NEIGHMASK;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
const X_FLOAT delz = ztmp - x(j,2);
const int jtype = type(j);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq < d_table_const.cutsq(itype,jtype)) {
double fpair;
const int tidx = d_table_const.tabindex(itype,jtype);
//const Table* const tb = &tables[tabindex[itype][jtype]];
//if (rsq < d_table_const.innersq(tidx))
// error->one(FLERR,"Pair distance < table inner cutoff");
if (TABSTYLE == LOOKUP) {
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//if (itable >= tlm1)
// error->one(FLERR,"Pair distance > table outer cutoff");
fpair = factor_lj * d_table_const.f(tidx,itable);
if (EVFLAG)
ev.evdwl = d_table_const.e(tidx,itable);
} else if (TABSTYLE == LINEAR) {
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//if (itable >= tlm1)
// error->one(FLERR,"Pair distance > table outer cutoff");
const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
fpair = factor_lj * value;
if (EVFLAG)
ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
} else if (TABSTYLE == SPLINE) {
const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//if (itable >= tlm1)
// error->one(FLERR,"Pair distance > table outer cutoff");
const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
const double a = 1.0 - b;
const double value = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
d_table_const.deltasq6(tidx);
fpair = factor_lj * value;
if (EVFLAG)
ev.evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
d_table_const.deltasq6(tidx);
} else {
rsq_lookup.f = rsq;
int itable = rsq_lookup.i & d_table_const.nmask(tidx);
itable >>= d_table_const.nshiftbits(tidx);
const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
const double value = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
fpair = factor_lj * value;
if (EVFLAG)
ev.evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
}
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
Kokkos::atomic_fetch_add(&f(j,0),-delx*fpair);
Kokkos::atomic_fetch_add(&f(j,1),-dely*fpair);
Kokkos::atomic_fetch_add(&f(j,2),-delz*fpair);
}
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < nlocal)) {
f(j,0) -= delx*fpair;
f(j,1) -= dely*fpair;
f(j,2) -= delz*fpair;
}
if(EVFLAG) {
if (eflag) {
ev.evdwl *= factor_lj;
}
if (evflag) ev_tally<NEIGHFLAG>(ev,i,j
,fpair,delx,dely,delz);
}
}
}
if (NEIGHFLAG == HALFTHREAD) {
Kokkos::atomic_fetch_add(&f(i,0),fxtmp);
Kokkos::atomic_fetch_add(&f(i,1),fytmp);
Kokkos::atomic_fetch_add(&f(i,2),fztmp);
} else {
f(i,0) += fxtmp;
f(i,1) += fytmp;
f(i,2) += fztmp;
}
return ev;
}
*/
template<class DeviceType>
void PairTableKokkos<DeviceType>::create_kokkos_tables()
{
@ -880,7 +764,6 @@ void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line)
word = strtok(NULL," \t\n\r\f");
tb->fphi = atof(word);
} else {
printf("WORD: %s\n",word);
error->one(FLERR,"Invalid keyword in pair table parameters");
}
word = strtok(NULL," \t\n\r\f");
@ -1494,7 +1377,7 @@ void PairTableKokkos<DeviceType>::cleanup_copy() {
}
template class PairTableKokkos<LMPDeviceType>;
#if DEVICE==2
#ifdef KOKKOS_HAVE_CUDA
template class PairTableKokkos<LMPHostType>;
#endif

View File

@ -41,6 +41,7 @@ template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {EnabledNeighFlags=FULL&HALFTHREAD&HALF&N2&FULLCLUSTER};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
@ -208,67 +209,10 @@ class PairTableKokkos : public Pair {
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
/*template<int FULL_NEIGH>
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const;
*/
};
/*
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
struct PairTableComputeFunctor {
typedef DeviceType device_type ;
typedef EV_FLOAT value_type;
PairTableKokkos<DeviceType> c;
NeighListKokkos<DeviceType> list;
PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
NeighListKokkos<DeviceType>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
else
energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
};
*/

View File

@ -53,6 +53,7 @@ VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
void VerletKokkos::setup()
{
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
update->setupflag = 1;
@ -65,19 +66,24 @@ void VerletKokkos::setup()
atomKK->setup();
modify->setup_pre_exchange();
// debug
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
if (atomKK->sortfreq > 0) atomKK->sort();
comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
@ -97,20 +103,47 @@ void VerletKokkos::setup()
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(TIME_BOND);
}
if (force->kspace) {
if(force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
} else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
@ -172,20 +205,47 @@ void VerletKokkos::setup_minimal(int flag)
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(TIME_BOND);
}
if (force->kspace) {
if(force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
} else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
@ -291,31 +351,47 @@ void VerletKokkos::run(int n)
timer->stamp();
if (pair_compute_flag) {
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
if (force->bond) {
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
force->bond->compute(eflag,vflag);
}
if (force->angle) {
atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
force->angle->compute(eflag,vflag);
}
if (force->dihedral) {
atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
force->dihedral->compute(eflag,vflag);
}
if (force->improper) {
atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
force->improper->compute(eflag,vflag);
}
timer->stamp(TIME_BOND);
}
if (kspace_compute_flag) {
atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
}
// reverse communication of forces
if (force->newton) {
atomKK->sync(Host,F_MASK);
comm->reverse_comm();
atomKK->modified(Host,F_MASK);
timer->stamp(TIME_COMM);
}
if (force->newton) comm->reverse_comm();
timer->stamp(TIME_COMM);
// force modifications, final time integration, diagnostics