git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12040 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-05-29 22:51:58 +00:00
parent 8ac75c42a4
commit 3545d44d0d
35 changed files with 9835 additions and 4 deletions

71
src/KOKKOS/Install.sh Normal file
View File

@ -0,0 +1,71 @@
# Install/unInstall package files in LAMMPS
# mode = 0/1/2 for uninstall/install/update
mode=$1
# arg1 = file, arg2 = file it depends on
action () {
if (test $mode = 0) then
rm -f ../$1
elif (! cmp -s $1 ../$1) then
if (test -z "$2" || test -e ../$2) then
cp $1 ..
if (test $mode = 2) then
echo " updating src/$1"
fi
fi
elif (test -n "$2") then
if (test ! -e ../$2) then
rm -f ../$1
fi
fi
}
# force rebuild of files with LMP_KOKKOS switch
touch ../accelerator_kokkos.h
touch ../memory.h
# all package files with no dependencies
for file in *.cpp *.h; do
action $file
done
# edit 2 Makefile.package files to include/exclude package info
if (test $1 = 1) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
# multiline form needed for BSD sed on Macs
sed -i -e '4 i \
include ..\/..\/lib\/kokkos\/Makefile.lammps
' ../Makefile.package.settings
fi
elif (test $1 = 0) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
fi
fi

190
src/KOKKOS/atom_kokkos.cpp Normal file
View File

@ -0,0 +1,190 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "atom_kokkos.h"
#include "atom_vec.h"
#include "atom_vec_kokkos.h"
#include "comm_kokkos.h"
#include "update.h"
#include "domain.h"
#include "atom_masks.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
{
// set CommKokkos pointer to Atom class, since CommKokkos allocated first
((CommKokkos *) comm)->atomKK = this;
}
/* ---------------------------------------------------------------------- */
AtomKokkos::~AtomKokkos()
{
k_tag = DAT::tdual_int_1d();
k_mask = DAT::tdual_int_1d();
k_type = DAT::tdual_int_1d();
k_image = DAT::tdual_int_1d();
k_molecule = DAT::tdual_int_1d();
k_x = DAT::tdual_x_array();
k_v = DAT::tdual_v_array();
k_f = DAT::tdual_f_array();
k_mass = DAT::tdual_float_1d();
tag = NULL;
mask = NULL;
type = NULL;
image = NULL;
molecule = NULL;
mass = NULL;
memory->sfree(x);
memory->sfree(v);
memory->sfree(f);
x = NULL;
v = NULL;
f = NULL;
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
{
((AtomVecKokkos *) avec)->sync(space,mask);
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
{
((AtomVecKokkos *) avec)->modified(space,mask);
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::allocate_type_arrays()
{
if (avec->mass_type) {
k_mass = DAT::tdual_float_1d("Mass",ntypes+1);
mass = k_mass.h_view.ptr_on_device();
mass_setflag = new int[ntypes+1];
for (int itype = 1; itype <= ntypes; itype++) mass_setflag[itype] = 0;
}
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sort()
{
int i,m,n,ix,iy,iz,ibin,empty;
sync(Host,ALL_MASK);
modified(Host,ALL_MASK);
// set next timestep for sorting to take place
nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
// re-setup sort bins if needed
if (domain->box_change) setup_sort_bins();
if (nbins == 1) return;
// reallocate per-atom vectors if needed
if (nlocal > maxnext) {
memory->destroy(next);
memory->destroy(permute);
maxnext = atom->nmax;
memory->create(next,maxnext,"atom:next");
memory->create(permute,maxnext,"atom:permute");
}
// insure there is one extra atom location at end of arrays for swaps
if (nlocal == nmax) avec->grow(0);
// bin atoms in reverse order so linked list will be in forward order
for (i = 0; i < nbins; i++) binhead[i] = -1;
HAT::t_x_array_const h_x = k_x.view<LMPHostType>();
for (i = nlocal-1; i >= 0; i--) {
ix = static_cast<int> ((h_x(i,0)-bboxlo[0])*bininvx);
iy = static_cast<int> ((h_x(i,1)-bboxlo[1])*bininvy);
iz = static_cast<int> ((h_x(i,2)-bboxlo[2])*bininvz);
ix = MAX(ix,0);
iy = MAX(iy,0);
iz = MAX(iz,0);
ix = MIN(ix,nbinx-1);
iy = MIN(iy,nbiny-1);
iz = MIN(iz,nbinz-1);
ibin = iz*nbiny*nbinx + iy*nbinx + ix;
next[i] = binhead[ibin];
binhead[ibin] = i;
}
// permute = desired permutation of atoms
// permute[I] = J means Ith new atom will be Jth old atom
n = 0;
for (m = 0; m < nbins; m++) {
i = binhead[m];
while (i >= 0) {
permute[n++] = i;
i = next[i];
}
}
// current = current permutation, just reuse next vector
// current[I] = J means Ith current atom is Jth old atom
int *current = next;
for (i = 0; i < nlocal; i++) current[i] = i;
// reorder local atom list, when done, current = permute
// perform "in place" using copy() to extra atom location at end of list
// inner while loop processes one cycle of the permutation
// copy before inner-loop moves an atom to end of atom list
// copy after inner-loop moves atom at end of list back into list
// empty = location in atom list that is currently empty
for (i = 0; i < nlocal; i++) {
if (current[i] == permute[i]) continue;
avec->copy(i,nlocal,0);
empty = i;
while (permute[empty] != i) {
avec->copy(permute[empty],empty,0);
empty = current[empty] = permute[empty];
}
avec->copy(nlocal,empty,0);
current[empty] = permute[empty];
}
// sanity check that current = permute
//int flag = 0;
//for (i = 0; i < nlocal; i++)
// if (current[i] != permute[i]) flag = 1;
//int flagall;
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
//if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
}

86
src/KOKKOS/atom_kokkos.h Normal file
View File

@ -0,0 +1,86 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "atom.h"
#include "kokkos_type.h"
#ifndef LMP_ATOM_KOKKOS_H
#define LMP_ATOM_KOKKOS_H
namespace LAMMPS_NS {
class AtomKokkos : public Atom {
public:
DAT::tdual_int_1d k_tag, k_type, k_mask, k_molecule;
DAT::tdual_tagint_1d k_image;
DAT::tdual_x_array k_x;
DAT::tdual_v_array k_v;
DAT::tdual_f_array k_f;
DAT::tdual_float_1d k_mass;
AtomKokkos(class LAMMPS *);
~AtomKokkos();
virtual void allocate_type_arrays();
void sync(const ExecutionSpace space, unsigned int mask);
void modified(const ExecutionSpace space, unsigned int mask);
virtual void sort();
};
template<class ViewType, class IndexView>
class SortFunctor {
typedef typename ViewType::device_type device_type;
ViewType source;
Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
IndexView index;
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0());
}
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1());
}
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2());
}
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2(),src.dimension_3());
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==1, int>::type& i) {
dest(i) = source(index(i));
}
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==2, int>::type& i) {
for(int j=0;j<source.dimension_1();j++)
dest(i,j) = source(index(i),j);
}
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==3, int>::type& i) {
for(int j=0;j<source.dimension_1();j++)
for(int k=0;k<source.dimension_2();k++)
dest(i,j,k) = source(index(i),j,k);
}
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==4, int>::type& i) {
for(int j=0;j<source.dimension_1();j++)
for(int k=0;k<source.dimension_2();k++)
for(int l=0;l<source.dimension_3();l++)
dest(i,j,k,l) = source(index(i),j,k,l);
}
};
}
#endif
/* ERROR/WARNING messages:
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,111 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(atomic/kk,AtomVecAtomicKokkos)
#else
#ifndef LMP_ATOM_VEC_ATOMIC_KOKKOS_H
#define LMP_ATOM_VEC_ATOMIC_KOKKOS_H
#include "atom_vec_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecAtomicKokkos : public AtomVecKokkos {
public:
AtomVecAtomicKokkos(class LAMMPS *);
virtual ~AtomVecAtomicKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
void pack_data(double **);
void write_data(FILE *, int, double **);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
protected:
int *tag,*type,*mask;
tagint *image;
double **x,**v,**f;
DAT::t_int_1d d_tag, d_type, d_mask;
HAT::t_int_1d h_tag, h_type, h_mask;
DAT::t_tagint_1d d_image;
HAT::t_tagint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,23 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "atom_vec_kokkos.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
{
kokkosable = 1;
}

View File

@ -0,0 +1,76 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_ATOM_VEC_KOKKOS_H
#define LMP_ATOM_VEC_KOKKOS_H
#include "atom_vec.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecKokkos : public AtomVec {
public:
AtomVecKokkos(class LAMMPS *);
virtual ~AtomVecKokkos() {}
virtual void sync(ExecutionSpace space, unsigned int mask) {};
virtual void modified(ExecutionSpace space, unsigned int mask) {};
virtual int
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[])
{return 0;}
virtual int
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[])
{return 0;}
virtual void
unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf) {};
virtual int
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space)
{return 0;};
virtual void
unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {};
virtual int
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi)
{return 0;};
virtual int
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space)
{return 0;};
protected:
class AtomKokkos *atomKK;
class CommKokkos *commKK;
};
}
#endif
/* ERROR/WARNING messages:
*/

820
src/KOKKOS/comm_kokkos.cpp Normal file
View File

@ -0,0 +1,820 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "comm_kokkos.h"
#include "kokkos.h"
#include "atom.h"
#include "atom_kokkos.h"
#include "atom_vec.h"
#include "atom_vec_kokkos.h"
#include "domain.h"
#include "atom_masks.h"
#include "error.h"
#include "memory.h"
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
#define BUFMIN 10000
#define BUFEXTRA 1000
enum{SINGLE,MULTI};
/* ----------------------------------------------------------------------
setup MPI and allocate buffer space
------------------------------------------------------------------------- */
CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
{
sendlist = NULL; // need to free this since parent allocated?
k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d();
// error check for disallow of OpenMP threads?
// initialize comm buffers & exchange memory
maxsend = BUFMIN;
k_buf_send = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
maxrecv = BUFMIN;
k_buf_recv = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
tdual_int_1d("comm:k_exchange_sendlist",100);
k_exchange_copylist = ArrayTypes<LMPDeviceType>::
tdual_int_1d("comm:k_exchange_copylist",100);
k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1);
k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100);
// next line is bogus?
memory->create(maxsendlist,maxswap,"comm:maxsendlist");
for (int i = 0; i < maxswap; i++) {
maxsendlist[i] = BUFMIN;
}
memory->create_kokkos(k_sendlist,sendlist,maxswap,BUFMIN,"comm:sendlist");
}
/* ---------------------------------------------------------------------- */
CommKokkos::~CommKokkos()
{
memory->destroy_kokkos(k_sendlist,sendlist);
memory->destroy_kokkos(k_buf_send,buf_send);
memory->destroy_kokkos(k_buf_recv,buf_recv);
}
/* ---------------------------------------------------------------------- */
void CommKokkos::init()
{
atomKK = (AtomKokkos *) atom;
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
forward_comm_classic = lmp->kokkos->forward_comm_classic;
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
CommBrick::init();
}
/* ----------------------------------------------------------------------
forward communication of atom coords every timestep
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void CommKokkos::forward_comm(int dummy)
{
if (!forward_comm_classic) {
if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
else forward_comm_device<LMPDeviceType>(dummy);
return;
}
k_sendlist.sync<LMPHostType>();
if (comm_x_only) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
} else if (ghost_velocity) {
atomKK->sync(Host,X_MASK | V_MASK);
atomKK->modified(Host,X_MASK | V_MASK);
} else {
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
}
CommBrick::forward_comm(dummy);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::forward_comm_device(int dummy)
{
int n;
MPI_Request request;
MPI_Status status;
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
double **x = atom->x;
double *buf;
// exchange data with another proc
// if other proc is self, just copy
// if comm_x_only set, exchange or copy directly to x, don't unpack
k_sendlist.sync<DeviceType>();
for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) {
if (comm_x_only) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
else buf = NULL;
if (size_forward_recv[iswap]) {
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
n,MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
} else if (ghost_velocity) {
error->all(FLERR,"Ghost velocity forward comm not yet "
"implemented with Kokkos");
if (size_forward_recv[iswap])
MPI_Irecv(k_buf_recv.view<LMPHostType>().ptr_on_device(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
} else {
if (size_forward_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
if (n)
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
}
} else {
if (!ghost_velocity) {
if (sendnum[iswap])
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
} else if (ghost_velocity) {
error->all(FLERR,"Ghost velocity forward comm not yet "
"implemented with Kokkos");
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
}
}
}
}
/* ----------------------------------------------------------------------
exchange: move atoms to correct processors
atoms exchanged with all 6 stencil neighbors
send out atoms that have left my box, receive ones entering my box
atoms will be lost if not inside some proc's box
can happen if atom moves outside of non-periodic bounary
or if atom moves more than one proc away
this routine called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
------------------------------------------------------------------------- */
void CommKokkos::exchange()
{
if (!exchange_comm_classic) {
if (exchange_comm_on_host) exchange_device<LMPHostType>();
else exchange_device<LMPDeviceType>();
return;
}
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
CommBrick::exchange();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct BuildExchangeListFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
X_FLOAT _lo,_hi;
typename AT::t_x_array _x;
int _nlocal,_dim;
typename AT::t_int_1d _nsend;
typename AT::t_int_1d _sendlist;
typename AT::t_int_1d _sendflag;
BuildExchangeListFunctor(
const typename AT::tdual_x_array x,
const typename AT::tdual_int_1d sendlist,
typename AT::tdual_int_1d nsend,
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
X_FLOAT lo, X_FLOAT hi):
_x(x.template view<DeviceType>()),
_sendlist(sendlist.template view<DeviceType>()),
_nsend(nsend.template view<DeviceType>()),
_sendflag(sendflag.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1);
if(mysend<_sendlist.dimension_0()) {
_sendlist(mysend) = i;
_sendflag(i) = 1;
}
} else
_sendflag(i) = 0;
}
};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::exchange_device()
{
int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
double lo,hi,value;
double **x;
double *sublo,*subhi,*buf;
MPI_Request request;
MPI_Status status;
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
// clear global->local map for owned and ghost atoms
// b/c atoms migrate to new procs in exchange() and
// new ghosts are created in borders()
// map_set() is done at end of borders()
// clear ghost count and any ghost bonus data internal to AtomVec
if (map_style) atom->map_clear();
atom->nghost = 0;
atom->avec->clear_bonus();
// subbox bounds for orthogonal or triclinic
if (triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
// loop over dimensions
for (int dim = 0; dim < 3; dim++) {
// fill buffer with atoms leaving my box, using < and >=
// when atom is deleted, fill it in with last atom
x = atom->x;
lo = sublo[dim];
hi = subhi[dim];
nlocal = atom->nlocal;
i = nsend = 0;
if (true) {
if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
k_count.h_view(0) = 0;
k_count.modify<LMPHostType>();
k_count.sync<DeviceType>();
BuildExchangeListFunctor<DeviceType>
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
DeviceType::fence();
k_exchange_sendlist.modify<DeviceType>();
k_sendflag.modify<DeviceType>();
k_count.modify<DeviceType>();
k_count.sync<LMPHostType>();
if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
k_exchange_copylist.resize(k_count.h_view(0)*1.1);
k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
}
}
k_exchange_sendlist.sync<LMPHostType>();
k_sendflag.sync<LMPHostType>();
int sendpos = nlocal-1;
nlocal -= k_count.h_view(0);
for(int i = 0; i < k_count.h_view(0); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos;
sendpos--;
} else
k_exchange_copylist.h_view(i) = -1;
}
k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>();
nsend =
avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::
space,dim,lo,hi);
DeviceType::fence();
} else {
while (i < nlocal) {
if (x[i][dim] < lo || x[i][dim] >= hi) {
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend += avec->pack_exchange(i,&buf_send[nsend]);
avec->copy(nlocal-1,i,1);
nlocal--;
} else i++;
}
}
atom->nlocal = nlocal;
// send/recv atoms in both directions
// if 1 proc in dimension, no send/recv, set recv buf to send buf
// if 2 procs in dimension, single send/recv
// if more than 2 procs in dimension, send/recv to both neighbors
if (procgrid[dim] == 1) {
nrecv = nsend;
buf = buf_send;
if (nrecv) {
atom->nlocal=avec->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
} else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
nrecv = nrecv1;
if (procgrid[dim] > 2) {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
nrecv += nrecv2;
}
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),nrecv1,
MPI_DOUBLE,procneigh[dim][1],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
MPI_DOUBLE,procneigh[dim][0],0,world);
MPI_Wait(&request,&status);
if (procgrid[dim] > 2) {
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device()+nrecv1,
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
MPI_DOUBLE,procneigh[dim][1],0,world);
MPI_Wait(&request,&status);
}
buf = buf_recv;
if (nrecv) {
atom->nlocal = avec->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
}
// check incoming atoms to see if they are in my box
// if so, add to my list
}
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
if (atom->firstgroupname) {
/* this is not yet implemented with Kokkos */
atomKK->sync(Host,ALL_MASK);
atom->first_reorder();
atomKK->modified(Host,ALL_MASK);
}
}
/* ----------------------------------------------------------------------
borders: list nearby atoms to send to neighboring procs at every timestep
one list is created for every swap that will be made
as list is made, actually do swaps
this does equivalent of a communicate, so don't need to explicitly
call communicate routine on reneighboring timestep
this routine is called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before borders is called
------------------------------------------------------------------------- */
void CommKokkos::borders()
{
if (!exchange_comm_classic) {
if (exchange_comm_on_host) borders_device<LMPHostType>();
else borders_device<LMPDeviceType>();
return;
}
atomKK->sync(Host,ALL_MASK);
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
CommBrick::borders();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
struct BuildBorderListFunctor {
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
X_FLOAT lo,hi;
typename AT::t_x_array x;
int iswap,maxsendlist;
int nfirst,nlast,dim;
typename AT::t_int_2d sendlist;
typename AT::t_int_1d nsend;
BuildBorderListFunctor(typename AT::tdual_x_array _x,
typename AT::tdual_int_2d _sendlist,
typename AT::tdual_int_1d _nsend,int _nfirst,
int _nlast, int _dim,
X_FLOAT _lo, X_FLOAT _hi, int _iswap,
int _maxsendlist):
x(_x.template view<DeviceType>()),
sendlist(_sendlist.template view<DeviceType>()),
nsend(_nsend.template view<DeviceType>()),
nfirst(_nfirst),nlast(_nlast),dim(_dim),
lo(_lo),hi(_hi),iswap(_iswap),maxsendlist(_maxsendlist){}
KOKKOS_INLINE_FUNCTION
void operator() (DeviceType dev) const {
const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) /
dev.league_size());
const int teamstart = chunk*dev.league_rank() + nfirst;
const int teamend = (teamstart + chunk) < nlast?(teamstart + chunk):nlast;
int mysend = 0;
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
}
const int my_store_pos = dev.team_scan(mysend,&nsend(0));
if (my_store_pos+mysend < maxsendlist) {
mysend = my_store_pos;
for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
if (x(i,dim) >= lo && x(i,dim) <= hi) {
sendlist(iswap,mysend++) = i;
}
}
}
}
size_t shmem_size() const { return 1000u;}
};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void CommKokkos::borders_device() {
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
double lo,hi;
int *type;
double **x;
double *buf,*mlo,*mhi;
MPI_Request request;
MPI_Status status;
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
k_sendlist.modify<DeviceType>();
atomKK->sync(exec_space,ALL_MASK);
// do swaps over all 3 dimensions
iswap = 0;
smax = rmax = 0;
for (dim = 0; dim < 3; dim++) {
nlast = 0;
twoneed = 2*maxneed[dim];
for (ineed = 0; ineed < twoneed; ineed++) {
// find atoms within slab boundaries lo/hi using <= and >=
// check atoms between nfirst and nlast
// for first swaps in a dim, check owned and ghost
// for later swaps in a dim, only check newly arrived ghosts
// store sent atom indices in list for use in future timesteps
x = atom->x;
if (style == SINGLE) {
lo = slablo[iswap];
hi = slabhi[iswap];
} else {
type = atom->type;
mlo = multilo[iswap];
mhi = multihi[iswap];
}
if (ineed % 2 == 0) {
nfirst = nlast;
nlast = atom->nlocal + atom->nghost;
}
nsend = 0;
// sendflag = 0 if I do not send on this swap
// sendneed test indicates receiver no longer requires data
// e.g. due to non-PBC or non-uniform sub-domains
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
else sendflag = 1;
// find send atoms according to SINGLE vs MULTI
// all atoms eligible versus atoms in bordergroup
// only need to limit loop to bordergroup for first sends (ineed < 2)
// on these sends, break loop in two: owned (in group) and ghost
if (sendflag) {
if (!bordergroup || ineed >= 2) {
if (style == SINGLE) {
typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
total_send.h_view(0) = 0;
if(exec_space == Device) {
total_send.template modify<DeviceType>();
total_send.template sync<LMPDeviceType>();
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>();
if(total_send.h_view(0) >= maxsendlist[iswap]) {
grow_list(iswap,total_send.h_view(0));
total_send.h_view(0) = 0;
if(exec_space == Device) {
total_send.template modify<LMPHostType>();
total_send.template sync<LMPDeviceType>();
}
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>();
}
nsend = total_send.h_view(0);
} else {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos\n");
for (i = nfirst; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
} else {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos\n");
if (style == SINGLE) {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
for (i = atom->nlocal; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
}
}
// pack up list of border atoms
if (nsend*size_border > maxsend)
grow_send_kokkos(nsend*size_border,0);
if (ghost_velocity) {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos\n");
n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
pbc_flag[iswap],pbc[iswap]);
}
else
n = avec->
pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
pbc_flag[iswap],pbc[iswap],exec_space);
// swap atoms with other proc
// no MPI calls except SendRecv if nsend/nrecv = 0
// put incoming ghosts at end of my atom arrays
// if swapping with self, simply copy, no messages
if (sendproc[iswap] != me) {
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border);
if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
nrecv*size_border,MPI_DOUBLE,
recvproc[iswap],0,world,&request);
if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
if (nrecv) MPI_Wait(&request,&status);
buf = buf_recv;
} else {
nrecv = nsend;
buf = buf_send;
}
// unpack buffer
if (ghost_velocity) {
error->all(FLERR,"Required border comm not yet "
"implemented with Kokkos\n");
avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
}
else
if (sendproc[iswap] != me)
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
k_buf_recv,exec_space);
else
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
k_buf_send,exec_space);
// set all pointers & counters
smax = MAX(smax,nsend);
rmax = MAX(rmax,nrecv);
sendnum[iswap] = nsend;
recvnum[iswap] = nrecv;
size_forward_recv[iswap] = nrecv*size_forward;
size_reverse_send[iswap] = nrecv*size_reverse;
size_reverse_recv[iswap] = nsend*size_reverse;
firstrecv[iswap] = atom->nlocal + atom->nghost;
atom->nghost += nrecv;
iswap++;
}
}
// insure send/recv buffers are long enough for all forward & reverse comm
int max = MAX(maxforward*smax,maxreverse*rmax);
if (max > maxsend) grow_send_kokkos(max,0);
max = MAX(maxforward*rmax,maxreverse*smax);
if (max > maxrecv) grow_recv_kokkos(max);
// reset global->local map
if (map_style) atom->map_set();
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
atomKK->modified(exec_space,ALL_MASK);
DeviceType::fence();
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
if flag = 1, realloc
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
{
maxsend = static_cast<int> (BUFFACTOR * n);
int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
if (flag) {
if(space == Device)
k_buf_send.modify<LMPDeviceType>();
else
k_buf_send.modify<LMPHostType>();
k_buf_send.resize(maxsend_border,atom->avec->size_border);
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
}
else {
k_buf_send = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
}
}
/* ----------------------------------------------------------------------
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
{
maxrecv = static_cast<int> (BUFFACTOR * n);
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
k_buf_recv = ArrayTypes<LMPDeviceType>::
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
}
/* ----------------------------------------------------------------------
realloc the size of the iswap sendlist as needed with BUFFACTOR
------------------------------------------------------------------------- */
void CommKokkos::grow_list(int iswap, int n)
{
int size = static_cast<int> (BUFFACTOR * n);
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
for(int i=0;i<maxswap;i++) {
maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
}
}
/* ----------------------------------------------------------------------
realloc the buffers needed for swaps
------------------------------------------------------------------------- */
void CommKokkos::grow_swap(int n)
{
free_swap();
allocate_swap(n);
if (style == MULTI) {
free_multi();
allocate_multi(n);
}
maxswap = n;
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
memory->grow(maxsendlist,n,"comm:maxsendlist");
for (int i=0;i<maxswap;i++) maxsendlist[i]=size;
}

63
src/KOKKOS/comm_kokkos.h Normal file
View File

@ -0,0 +1,63 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_COMM_KOKKOS_H
#define LMP_COMM_KOKKOS_H
#include "comm_brick.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class CommKokkos : public CommBrick {
public:
class AtomKokkos *atomKK;
bool exchange_comm_classic;
bool forward_comm_classic;
bool exchange_comm_on_host;
bool forward_comm_on_host;
CommKokkos(class LAMMPS *);
~CommKokkos();
void init();
void forward_comm(int dummy = 0); // forward comm of atom coords
void exchange(); // move atoms to new procs
void borders(); // setup list of atoms to comm
template<class DeviceType> void forward_comm_device(int dummy);
template<class DeviceType> void exchange_device();
template<class DeviceType> void borders_device();
protected:
DAT::tdual_int_2d k_sendlist;
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
DAT::tdual_int_1d k_count;
//double *buf_send; // send buffer for all comm
//double *buf_recv; // recv buffer for all comm
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
void grow_recv_kokkos(int, ExecutionSpace space = Host);
void grow_list(int, int);
void grow_swap(int);
};
}
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,207 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "domain_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
DomainKokkos::DomainKokkos(LAMMPS *lmp) : Domain(lmp) {}
/* ---------------------------------------------------------------------- */
void DomainKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Domain::init();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType, int PERIODIC, int DEFORM_VREMAP>
struct DomainPBCFunctor {
typedef DeviceType device_type;
double lo[3],hi[3],period[3];
typename ArrayTypes<DeviceType>::t_x_array x;
typename ArrayTypes<DeviceType>::t_v_array v;
typename ArrayTypes<DeviceType>::t_int_1d mask;
typename ArrayTypes<DeviceType>::t_int_1d image;
int deform_groupbit;
double h_rate[6];
int xperiodic,yperiodic,zperiodic;
DomainPBCFunctor(double* _lo, double* _hi, double* _period,
DAT::tdual_x_array _x, DAT::tdual_v_array _v,
DAT::tdual_int_1d _mask, DAT::tdual_int_1d _image,
int _deform_groupbit, double* _h_rate,
int _xperiodic, int _yperiodic, int _zperiodic):
x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
deform_groupbit(_deform_groupbit),
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];
h_rate[0]=_h_rate[0]; h_rate[1]=_h_rate[1]; h_rate[2]=_h_rate[2];
h_rate[3]=_h_rate[3]; h_rate[4]=_h_rate[4]; h_rate[5]=_h_rate[5];
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
if (PERIODIC && xperiodic) {
if (x(i,0) < lo[0]) {
x(i,0) += period[0];
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) += h_rate[0];
int idim = image[i] & IMGMASK;
const int otherdims = image[i] ^ idim;
idim--;
idim &= IMGMASK;
image[i] = otherdims | idim;
}
if (x(i,0) >= hi[0]) {
x(i,0) -= period[0];
x(i,0) = MAX(x(i,0),lo[0]);
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) -= h_rate[0];
int idim = image[i] & IMGMASK;
const int otherdims = image[i] ^ idim;
idim++;
idim &= IMGMASK;
image[i] = otherdims | idim;
}
}
if (PERIODIC && yperiodic) {
if (x(i,1) < lo[1]) {
x(i,1) += period[1];
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
v(i,0) += h_rate[5];
v(i,1) += h_rate[1];
}
int idim = (image[i] >> IMGBITS) & IMGMASK;
const int otherdims = image[i] ^ (idim << IMGBITS);
idim--;
idim &= IMGMASK;
image[i] = otherdims | (idim << IMGBITS);
}
if (x(i,1) >= hi[1]) {
x(i,1) -= period[1];
x(i,1) = MAX(x(i,1),lo[1]);
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
v(i,0) -= h_rate[5];
v(i,1) -= h_rate[1];
}
int idim = (image[i] >> IMGBITS) & IMGMASK;
const int otherdims = image[i] ^ (idim << IMGBITS);
idim++;
idim &= IMGMASK;
image[i] = otherdims | (idim << IMGBITS);
}
}
if (PERIODIC && zperiodic) {
if (x(i,2) < lo[2]) {
x(i,2) += period[2];
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
v(i,0) += h_rate[4];
v(i,1) += h_rate[3];
v(i,2) += h_rate[2];
}
int idim = image[i] >> IMG2BITS;
const int otherdims = image[i] ^ (idim << IMG2BITS);
idim--;
idim &= IMGMASK;
image[i] = otherdims | (idim << IMG2BITS);
}
if (x(i,2) >= hi[2]) {
x(i,2) -= period[2];
x(i,2) = MAX(x(i,2),lo[2]);
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
v(i,0) -= h_rate[4];
v(i,1) -= h_rate[3];
v(i,2) -= h_rate[2];
}
int idim = image[i] >> IMG2BITS;
const int otherdims = image[i] ^ (idim << IMG2BITS);
idim++;
idim &= IMGMASK;
image[i] = otherdims | (idim << IMG2BITS);
}
}
}
};
/* ----------------------------------------------------------------------
enforce PBC and modify box image flags for each atom
called every reneighboring and by other commands that change atoms
resulting coord must satisfy lo <= coord < hi
MAX is important since coord - prd < lo can happen when coord = hi
if fix deform, remap velocity of fix group atoms by box edge velocities
for triclinic, atoms must be in lamda coords (0-1) before pbc is called
image = 10 bits for each dimension
increment/decrement in wrap-around fashion
------------------------------------------------------------------------- */
void DomainKokkos::pbc()
{
double *lo,*hi,*period;
int nlocal = atomKK->nlocal;
if (triclinic == 0) {
lo = boxlo;
hi = boxhi;
period = prd;
} else {
lo = boxlo_lamda;
hi = boxhi_lamda;
period = prd_lamda;
}
atomKK->sync(Device,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK);
atomKK->modified(Device,X_MASK|V_MASK);
if (xperiodic || yperiodic || zperiodic) {
if (deform_vremap) {
DomainPBCFunctor<LMPDeviceType,1,1>
f(lo,hi,period,
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
Kokkos::parallel_for(nlocal,f);
} else {
DomainPBCFunctor<LMPDeviceType,1,0>
f(lo,hi,period,
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
Kokkos::parallel_for(nlocal,f);
}
} else {
if (deform_vremap) {
DomainPBCFunctor<LMPDeviceType,0,1>
f(lo,hi,period,
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
Kokkos::parallel_for(nlocal,f);
} else {
DomainPBCFunctor<LMPDeviceType,0,0>
f(lo,hi,period,
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
Kokkos::parallel_for(nlocal,f);
}
}
LMPDeviceType::fence();
}

View File

@ -0,0 +1,38 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_DOMAIN_KOKKOS_H
#define LMP_DOMAIN_KOKKOS_H
#include "domain.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class DomainKokkos : public Domain {
public:
class AtomKokkos *atomKK;
DomainKokkos(class LAMMPS *);
~DomainKokkos() {}
void init();
void pbc();
};
}
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,177 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "stdio.h"
#include "string.h"
#include "fix_nve_kokkos.h"
#include "atom_masks.h"
#include "atom_kokkos.h"
#include "force.h"
#include "update.h"
#include "respa.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixNVEKokkos<DeviceType>::FixNVEKokkos(LAMMPS *lmp, int narg, char **arg) :
FixNVE(lmp, narg, arg)
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
datamask_modify = X_MASK | V_MASK | F_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixNVEKokkos<DeviceType>::init()
{
FixNVE::init();
atomKK->k_mass.modify<LMPHostType>();
atomKK->k_mass.sync<LMPDeviceType>();
}
/* ----------------------------------------------------------------------
allow for both per-type and per-atom mass
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
{
atomKK->sync(execution_space,datamask_read);
atomKK->modified(execution_space,datamask_modify);
x = atomKK->k_x.view<DeviceType>();
v = atomKK->k_v.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
rmass = atomKK->rmass;
mass = atomKK->k_mass.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atomKK->nlocal;
if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
if (rmass) {
FixNVEKokkosInitialIntegrateFunctor<DeviceType,1> functor(this);
Kokkos::parallel_for(nlocal,functor);
} else {
FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
}
DeviceType::fence();
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNVEKokkos<DeviceType>::initial_integrate_item(int i) const
{
if (mask[i] & groupbit) {
const double dtfm = dtf / mass[type[i]];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
x(i,0) += dtv * v(i,0);
x(i,1) += dtv * v(i,1);
x(i,2) += dtv * v(i,2);
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNVEKokkos<DeviceType>::initial_integrate_rmass_item(int i) const
{
if (mask[i] & groupbit) {
const double dtfm = dtf / rmass[type[i]];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
x(i,0) += dtv * v(i,0);
x(i,1) += dtv * v(i,1);
x(i,2) += dtv * v(i,2);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixNVEKokkos<DeviceType>::final_integrate()
{
atomKK->sync(execution_space,datamask_read);
atomKK->modified(execution_space,datamask_modify);
v = atomKK->k_v.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
rmass = atomKK->rmass;
mass = atomKK->k_mass.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atomKK->nlocal;
if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
if (rmass) {
FixNVEKokkosFinalIntegrateFunctor<DeviceType,1> functor(this);
Kokkos::parallel_for(nlocal,functor);
} else {
FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
}
DeviceType::fence();
// debug
//atomKK->sync(Host,datamask_read);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNVEKokkos<DeviceType>::final_integrate_item(int i) const
{
if (mask[i] & groupbit) {
const double dtfm = dtf / mass[type[i]];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNVEKokkos<DeviceType>::final_integrate_rmass_item(int i) const
{
if (mask[i] & groupbit) {
const double dtfm = dtf / rmass[type[i]];
v(i,0) += dtfm * f(i,0);
v(i,1) += dtfm * f(i,1);
v(i,2) += dtfm * f(i,2);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixNVEKokkos<DeviceType>::cleanup_copy()
{
id = style = NULL;
vatom = NULL;
}
template class FixNVEKokkos<LMPDeviceType>;
#if DEVICE==2
template class FixNVEKokkos<LMPHostType>;
#endif

110
src/KOKKOS/fix_nve_kokkos.h Normal file
View File

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(nve/kk,FixNVEKokkos<LMPDeviceType>)
FixStyle(nve/kk/device,FixNVEKokkos<LMPDeviceType>)
FixStyle(nve/kk/host,FixNVEKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_NVE_KOKKOS_H
#define LMP_FIX_NVE_KOKKOS_H
#include "fix_nve.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class FixNVEKokkos;
template <class DeviceType, int RMass>
class FixNVEKokkosInitialIntegrateFunctor;
template <class DeviceType, int RMass>
class FixNVEKokkosFinalIntegrateFunctor;
template<class DeviceType>
class FixNVEKokkos : public FixNVE {
public:
FixNVEKokkos(class LAMMPS *, int, char **);
~FixNVEKokkos() {}
void cleanup_copy();
void init();
void initial_integrate(int);
void final_integrate();
KOKKOS_INLINE_FUNCTION
void initial_integrate_item(int) const;
KOKKOS_INLINE_FUNCTION
void initial_integrate_rmass_item(int) const;
KOKKOS_INLINE_FUNCTION
void final_integrate_item(int) const;
KOKKOS_INLINE_FUNCTION
void final_integrate_rmass_item(int) const;
private:
class AtomKokkos *atomKK;
typename ArrayTypes<DeviceType>::t_x_array x;
typename ArrayTypes<DeviceType>::t_v_array v;
typename ArrayTypes<DeviceType>::t_f_array_const f;
double *rmass;
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
typename ArrayTypes<DeviceType>::t_int_1d type;
typename ArrayTypes<DeviceType>::t_int_1d mask;
};
template <class DeviceType, int RMass>
struct FixNVEKokkosInitialIntegrateFunctor {
typedef DeviceType device_type ;
FixNVEKokkos<DeviceType> c;
FixNVEKokkosInitialIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();};
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (RMass) c.initial_integrate_rmass_item(i);
else c.initial_integrate_item(i);
}
};
template <class DeviceType, int RMass>
struct FixNVEKokkosFinalIntegrateFunctor {
typedef DeviceType device_type ;
FixNVEKokkos<DeviceType> c;
FixNVEKokkosFinalIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
c(*c_ptr) {c.cleanup_copy();};
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (RMass) c.final_integrate_rmass_item(i);
else c.final_integrate_item(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
*/

220
src/KOKKOS/kokkos.cpp Normal file
View File

@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "stdio.h"
#include "string.h"
#include "stdlib.h"
#include "ctype.h"
#include "kokkos.h"
#include "lammps.h"
#include "neighbor_kokkos.h"
#include "neigh_list_kokkos.h"
#include "error.h"
using namespace LAMMPS_NS;
enum{FULL,HALFTHREAD,HALF};
/* ---------------------------------------------------------------------- */
KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
{
kokkos_exists = 1;
lmp->kokkos = this;
// process any command-line args that invoke Kokkos settings
int device = 0;
int num_threads = 1;
int numa = 1;
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg],"d") == 0 || strcmp(arg[iarg],"device") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
device = atoi(arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"g") == 0 ||
strcmp(arg[iarg],"gpus") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
int ngpu = atoi(arg[iarg+1]);
iarg += 2;
int skip_gpu = 9999;
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
skip_gpu = atoi(arg[iarg+2]);
iarg++;
}
char *str;
if (str = getenv("SLURM_LOCALID")) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
int local_rank = atoi(str);
device = local_rank % ngpu;
if (device >= skip_gpu) device++;
}
} else if (strcmp(arg[iarg],"t") == 0 ||
strcmp(arg[iarg],"threads") == 0) {
num_threads = atoi(arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"n") == 0 ||
strcmp(arg[iarg],"numa") == 0) {
numa = atoi(arg[iarg+1]);
iarg += 2;
} else error->all(FLERR,"Invalid Kokkos command-line args");
}
// initialize Kokkos
#if DEVICE==2
Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
Kokkos::Cuda::SelectDevice select_device(device);
Kokkos::Cuda::initialize(select_device);
#else
LMPHostType::initialize(num_threads,numa);
#endif
// default settings for package kokkos command
neighflag = FULL;
exchange_comm_classic = 0;
forward_comm_classic = 0;
exchange_comm_on_host = 1;
forward_comm_on_host = 1;
}
/* ---------------------------------------------------------------------- */
KokkosLMP::~KokkosLMP()
{
// finalize Kokkos
#if DEVICE==2
Kokkos::Cuda::finalize();
Kokkos::Cuda::host_mirror_device_type::finalize();
#else
LMPHostType::finalize();
#endif
}
/* ----------------------------------------------------------------------
invoked by package kokkos command
------------------------------------------------------------------------- */
void KokkosLMP::accelerator(int narg, char **arg)
{
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg],"neigh") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL;
else if (strcmp(arg[iarg+1],"half/thread") == 0) neighflag = HALFTHREAD;
else if (strcmp(arg[iarg+1],"half") == 0) neighflag = HALF;
else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
else error->all(FLERR,"Illegal package command");
iarg += 2;
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
if (strcmp(arg[iarg+1],"no") == 0) exchange_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
exchange_comm_classic = 0;
exchange_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
exchange_comm_classic = 0;
exchange_comm_on_host = 0;
} else error->all(FLERR,"Illegal package command");
iarg += 2;
} else if (strcmp(arg[iarg],"comm/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
if (strcmp(arg[iarg+1],"no") == 0) forward_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
forward_comm_classic = 0;
forward_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
forward_comm_classic = 0;
forward_comm_on_host = 0;
} else error->all(FLERR,"Illegal package command");
iarg += 2;
} else error->all(FLERR,"Illegal package command");
}
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_list_kokkos(int m)
{
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
return 1;
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
return 1;
return 0;
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
{
int inum;
int nneigh = 0;
ArrayTypes<LMPHostType>::t_int_1d h_ilist;
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m]) {
inum = nk->lists_host[m]->inum;
#ifndef KOKKOS_USE_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
#else
h_ilist = nk->lists_host[m]->d_ilist;
h_numneigh = nk->lists_host[m]->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
} else if (nk->lists_device[m]) {
inum = nk->lists_device[m]->inum;
#ifndef KOKKOS_USE_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
#else
h_ilist = nk->lists_device[m]->d_ilist;
h_numneigh = nk->lists_device[m]->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
return nneigh;
}

40
src/KOKKOS/kokkos.h Normal file
View File

@ -0,0 +1,40 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef KOKKOS_LMP_H
#define KOKKOS_LMP_H
#include "pointers.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class KokkosLMP : protected Pointers {
public:
int kokkos_exists;
int neighflag;
int exchange_comm_classic;
int forward_comm_classic;
int exchange_comm_on_host;
int forward_comm_on_host;
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();
void accelerator(int, char **);
int neigh_list_kokkos(int);
int neigh_count(int);
};
}
#endif

617
src/KOKKOS/kokkos_type.h Normal file
View File

@ -0,0 +1,617 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_LMPTYPE_KOKKOS_H
#define LMP_LMPTYPE_KOKKOS_H
#include <Kokkos_View.hpp>
#include <Kokkos_Macros.hpp>
#include <Kokkos_Atomic.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Vectorization.hpp>
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
// set LMPHostype and LMPDeviceType
#ifndef DEVICE
#define DEVICE 1
#endif
#if DEVICE==1
#ifdef KOKKOS_HAVE_OPENMP
#include "Kokkos_OpenMP.hpp"
typedef Kokkos::OpenMP LMPDeviceType;
typedef Kokkos::OpenMP LMPHostType;
#else
#include "Kokkos_Threads.hpp"
typedef Kokkos::Threads LMPDeviceType;
typedef Kokkos::Threads LMPHostType;
#endif
#ifndef __CUDACC__
struct double2 {
double x, y;
};
struct float2 {
float x, y;
};
struct double4 {
double x, y, z, w;
};
struct float4 {
float x, y, z, w;
};
#endif
#else
#include "cuda.h"
#include "cuda_runtime.h"
#include "Kokkos_Cuda.hpp"
#include "Kokkos_Threads.hpp"
typedef Kokkos::Cuda LMPDeviceType;
typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
#endif
// set ExecutionSpace stuct with variable "space"
template<class Device>
struct ExecutionSpaceFromDevice;
#ifdef KOKKOS_HAVE_OPENMP
template<>
struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
};
#else
template<>
struct ExecutionSpaceFromDevice<Kokkos::Threads> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
};
#endif
#if DEVICE==2
template<>
struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
};
#endif
// define precision
// handle global precision, force, energy, positions, kspace separately
#ifndef PRECISION
#define PRECISION 2
#endif
#if PRECISION==1
typedef float LMP_FLOAT;
typedef float2 LMP_FLOAT2;
typedef float4 LMP_FLOAT4;
#else
typedef double LMP_FLOAT;
typedef double2 LMP_FLOAT2;
typedef double4 LMP_FLOAT4;
#endif
#ifndef PREC_FORCE
#define PREC_FORCE PRECISION
#endif
#if PREC_FORCE==1
typedef float F_FLOAT;
typedef float2 F_FLOAT2;
typedef float4 F_FLOAT4;
#else
typedef double F_FLOAT;
typedef double2 F_FLOAT2;
typedef double4 F_FLOAT4;
#endif
#ifndef PREC_ENERGY
#define PREC_ENERGY PRECISION
#endif
#if PREC_ENERGY==1
typedef float E_FLOAT;
typedef float2 E_FLOAT2;
typedef float4 E_FLOAT4;
#else
typedef double E_FLOAT;
typedef double2 E_FLOAT2;
typedef double4 E_FLOAT4;
#endif
struct s_EV_FLOAT {
E_FLOAT evdwl;
E_FLOAT ecoul;
E_FLOAT v[6];
KOKKOS_INLINE_FUNCTION
s_EV_FLOAT() {
evdwl = 0;
ecoul = 0;
v[0] = 0; v[1] = 0; v[2] = 0;
v[3] = 0; v[4] = 0; v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
evdwl += rhs.evdwl;
ecoul += rhs.ecoul;
v[0] += rhs.v[0];
v[1] += rhs.v[1];
v[2] += rhs.v[2];
v[3] += rhs.v[3];
v[4] += rhs.v[4];
v[5] += rhs.v[5];
return *this;
}
};
typedef struct s_EV_FLOAT EV_FLOAT;
#ifndef PREC_POS
#define PREC_POS PRECISION
#endif
#if PREC_POS==1
typedef float X_FLOAT;
typedef float2 X_FLOAT2;
typedef float4 X_FLOAT4;
#else
typedef double X_FLOAT;
typedef double2 X_FLOAT2;
typedef double4 X_FLOAT4;
#endif
#ifndef PREC_VELOCITIES
#define PREC_VELOCITIES PRECISION
#endif
#if PREC_VELOCITIES==1
typedef float V_FLOAT;
typedef float2 V_FLOAT2;
typedef float4 V_FLOAT4;
#else
typedef double V_FLOAT;
typedef double2 V_FLOAT2;
typedef double4 V_FLOAT4;
#endif
#if PREC_KSPACE==1
typedef float K_FLOAT;
typedef float2 K_FLOAT2;
typedef float4 K_FLOAT4;
#else
typedef double K_FLOAT;
typedef double2 K_FLOAT2;
typedef double4 K_FLOAT4;
#endif
// ------------------------------------------------------------------------
// LAMMPS types
template <class DeviceType>
struct ArrayTypes;
template <>
struct ArrayTypes<LMPDeviceType> {
// scalar types
typedef Kokkos::
DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
typedef tdual_int_scalar::t_dev t_int_scalar;
typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
typedef Kokkos::
DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType>
tdual_float_scalar;
typedef tdual_float_scalar::t_dev t_float_scalar;
typedef tdual_float_scalar::t_dev_const t_float_scalar_const;
typedef tdual_float_scalar::t_dev_um t_float_scalar_um;
typedef tdual_float_scalar::t_dev_const_um t_float_scalar_const_um;
// generic array types
typedef Kokkos::
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
typedef tdual_int_1d::t_dev t_int_1d;
typedef tdual_int_1d::t_dev_const t_int_1d_const;
typedef tdual_int_1d::t_dev_um t_int_1d_um;
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
typedef Kokkos::
DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
typedef tdual_int_2d::t_dev t_int_2d;
typedef tdual_int_2d::t_dev_const t_int_2d_const;
typedef tdual_int_2d::t_dev_um t_int_2d_um;
typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
tdual_tagint_1d;
typedef tdual_tagint_1d::t_dev t_tagint_1d;
typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
typedef tdual_tagint_1d::t_dev_um t_tagint_1d_um;
typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
// 1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
typedef tdual_float_1d::t_dev t_float_1d;
typedef tdual_float_1d::t_dev_const t_float_1d_const;
typedef tdual_float_1d::t_dev_um t_float_1d_um;
typedef tdual_float_1d::t_dev_const_um t_float_1d_const_um;
typedef tdual_float_1d::t_dev_const_randomread t_float_1d_randomread;
//2d float array n
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
typedef tdual_float_2d::t_dev t_float_2d;
typedef tdual_float_2d::t_dev_const t_float_2d_const;
typedef tdual_float_2d::t_dev_um t_float_2d_um;
typedef tdual_float_2d::t_dev_const_um t_float_2d_const_um;
typedef tdual_float_2d::t_dev_const_randomread t_float_2d_randomread;
//Position Types
//1d X_FLOAT array n
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
typedef tdual_xfloat_1d::t_dev t_xfloat_1d;
typedef tdual_xfloat_1d::t_dev_const t_xfloat_1d_const;
typedef tdual_xfloat_1d::t_dev_um t_xfloat_1d_um;
typedef tdual_xfloat_1d::t_dev_const_um t_xfloat_1d_const_um;
typedef tdual_xfloat_1d::t_dev_const_randomread t_xfloat_1d_randomread;
//2d X_FLOAT array n*m
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
typedef tdual_xfloat_2d::t_dev t_xfloat_2d;
typedef tdual_xfloat_2d::t_dev_const t_xfloat_2d_const;
typedef tdual_xfloat_2d::t_dev_um t_xfloat_2d_um;
typedef tdual_xfloat_2d::t_dev_const_um t_xfloat_2d_const_um;
typedef tdual_xfloat_2d::t_dev_const_randomread t_xfloat_2d_randomread;
//2d X_FLOAT array n*4
#ifdef LMP_KOKKOS_NO_LEGACY
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutLeft, LMPDeviceType> tdual_x_array;
#else
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
#endif
typedef tdual_x_array::t_dev t_x_array;
typedef tdual_x_array::t_dev_const t_x_array_const;
typedef tdual_x_array::t_dev_um t_x_array_um;
typedef tdual_x_array::t_dev_const_um t_x_array_const_um;
typedef tdual_x_array::t_dev_const_randomread t_x_array_randomread;
//Velocity Types
//1d V_FLOAT array n
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
typedef tdual_vfloat_1d::t_dev t_vfloat_1d;
typedef tdual_vfloat_1d::t_dev_const t_vfloat_1d_const;
typedef tdual_vfloat_1d::t_dev_um t_vfloat_1d_um;
typedef tdual_vfloat_1d::t_dev_const_um t_vfloat_1d_const_um;
typedef tdual_vfloat_1d::t_dev_const_randomread t_vfloat_1d_randomread;
//2d V_FLOAT array n*m
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
typedef tdual_vfloat_2d::t_dev t_vfloat_2d;
typedef tdual_vfloat_2d::t_dev_const t_vfloat_2d_const;
typedef tdual_vfloat_2d::t_dev_um t_vfloat_2d_um;
typedef tdual_vfloat_2d::t_dev_const_um t_vfloat_2d_const_um;
typedef tdual_vfloat_2d::t_dev_const_randomread t_vfloat_2d_randomread;
//2d V_FLOAT array n*3
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
typedef tdual_v_array::t_dev t_v_array;
typedef tdual_v_array::t_dev_const t_v_array_const;
typedef tdual_v_array::t_dev_um t_v_array_um;
typedef tdual_v_array::t_dev_const_um t_v_array_const_um;
typedef tdual_v_array::t_dev_const_randomread t_v_array_randomread;
//Force Types
//1d F_FLOAT array n
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
typedef tdual_ffloat_1d::t_dev t_ffloat_1d;
typedef tdual_ffloat_1d::t_dev_const t_ffloat_1d_const;
typedef tdual_ffloat_1d::t_dev_um t_ffloat_1d_um;
typedef tdual_ffloat_1d::t_dev_const_um t_ffloat_1d_const_um;
typedef tdual_ffloat_1d::t_dev_const_randomread t_ffloat_1d_randomread;
//2d F_FLOAT array n*m
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
typedef tdual_ffloat_2d::t_dev t_ffloat_2d;
typedef tdual_ffloat_2d::t_dev_const t_ffloat_2d_const;
typedef tdual_ffloat_2d::t_dev_um t_ffloat_2d_um;
typedef tdual_ffloat_2d::t_dev_const_um t_ffloat_2d_const_um;
typedef tdual_ffloat_2d::t_dev_const_randomread t_ffloat_2d_randomread;
//2d F_FLOAT array n*3
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
typedef tdual_f_array::t_dev t_f_array;
typedef tdual_f_array::t_dev_const t_f_array_const;
typedef tdual_f_array::t_dev_um t_f_array_um;
typedef tdual_f_array::t_dev_const_um t_f_array_const_um;
typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread;
//2d F_FLOAT array n*6 (for virial)
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
typedef tdual_virial_array::t_dev t_virial_array;
typedef tdual_virial_array::t_dev_const t_virial_array_const;
typedef tdual_virial_array::t_dev_um t_virial_array_um;
typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
//Energy Types
//1d E_FLOAT array n
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
typedef tdual_efloat_1d::t_dev t_efloat_1d;
typedef tdual_efloat_1d::t_dev_const t_efloat_1d_const;
typedef tdual_efloat_1d::t_dev_um t_efloat_1d_um;
typedef tdual_efloat_1d::t_dev_const_um t_efloat_1d_const_um;
typedef tdual_efloat_1d::t_dev_const_randomread t_efloat_1d_randomread;
//2d E_FLOAT array n*m
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
typedef tdual_efloat_2d::t_dev t_efloat_2d;
typedef tdual_efloat_2d::t_dev_const t_efloat_2d_const;
typedef tdual_efloat_2d::t_dev_um t_efloat_2d_um;
typedef tdual_efloat_2d::t_dev_const_um t_efloat_2d_const_um;
typedef tdual_efloat_2d::t_dev_const_randomread t_efloat_2d_randomread;
//2d E_FLOAT array n*3
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
typedef tdual_e_array::t_dev t_e_array;
typedef tdual_e_array::t_dev_const t_e_array_const;
typedef tdual_e_array::t_dev_um t_e_array_um;
typedef tdual_e_array::t_dev_const_um t_e_array_const_um;
typedef tdual_e_array::t_dev_const_randomread t_e_array_randomread;
//Neighbor Types
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
typedef tdual_neighbors_2d::t_dev t_neighbors_2d;
typedef tdual_neighbors_2d::t_dev_const t_neighbors_2d_const;
typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
};
#if DEVICE==2
template <>
struct ArrayTypes<LMPHostType> {
//Scalar Types
typedef Kokkos::DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
typedef tdual_int_scalar::t_host t_int_scalar;
typedef tdual_int_scalar::t_host_const t_int_scalar_const;
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
typedef tdual_float_scalar::t_host t_float_scalar;
typedef tdual_float_scalar::t_host_const t_float_scalar_const;
typedef tdual_float_scalar::t_host_um t_float_scalar_um;
typedef tdual_float_scalar::t_host_const_um t_float_scalar_const_um;
//Generic ArrayTypes
typedef Kokkos::DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
typedef tdual_int_1d::t_host t_int_1d;
typedef tdual_int_1d::t_host_const t_int_1d_const;
typedef tdual_int_1d::t_host_um t_int_1d_um;
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
typedef Kokkos::DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
typedef tdual_int_2d::t_host t_int_2d;
typedef tdual_int_2d::t_host_const t_int_2d_const;
typedef tdual_int_2d::t_host_um t_int_2d_um;
typedef tdual_int_2d::t_host_const_um t_int_2d_const_um;
typedef tdual_int_2d::t_host_const_randomread t_int_2d_randomread;
typedef Kokkos::DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d;
typedef tdual_tagint_1d::t_host t_tagint_1d;
typedef tdual_tagint_1d::t_host_const t_tagint_1d_const;
typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
//1d float array n
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
typedef tdual_float_1d::t_host t_float_1d;
typedef tdual_float_1d::t_host_const t_float_1d_const;
typedef tdual_float_1d::t_host_um t_float_1d_um;
typedef tdual_float_1d::t_host_const_um t_float_1d_const_um;
typedef tdual_float_1d::t_host_const_randomread t_float_1d_randomread;
//2d float array n
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
typedef tdual_float_2d::t_host t_float_2d;
typedef tdual_float_2d::t_host_const t_float_2d_const;
typedef tdual_float_2d::t_host_um t_float_2d_um;
typedef tdual_float_2d::t_host_const_um t_float_2d_const_um;
typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread;
//Position Types
//1d X_FLOAT array n
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
typedef tdual_xfloat_1d::t_host t_xfloat_1d;
typedef tdual_xfloat_1d::t_host_const t_xfloat_1d_const;
typedef tdual_xfloat_1d::t_host_um t_xfloat_1d_um;
typedef tdual_xfloat_1d::t_host_const_um t_xfloat_1d_const_um;
typedef tdual_xfloat_1d::t_host_const_randomread t_xfloat_1d_randomread;
//2d X_FLOAT array n*m
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
typedef tdual_xfloat_2d::t_host t_xfloat_2d;
typedef tdual_xfloat_2d::t_host_const t_xfloat_2d_const;
typedef tdual_xfloat_2d::t_host_um t_xfloat_2d_um;
typedef tdual_xfloat_2d::t_host_const_um t_xfloat_2d_const_um;
typedef tdual_xfloat_2d::t_host_const_randomread t_xfloat_2d_randomread;
//2d X_FLOAT array n*3
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
typedef tdual_x_array::t_host t_x_array;
typedef tdual_x_array::t_host_const t_x_array_const;
typedef tdual_x_array::t_host_um t_x_array_um;
typedef tdual_x_array::t_host_const_um t_x_array_const_um;
typedef tdual_x_array::t_host_const_randomread t_x_array_randomread;
//Velocity Types
//1d V_FLOAT array n
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
typedef tdual_vfloat_1d::t_host t_vfloat_1d;
typedef tdual_vfloat_1d::t_host_const t_vfloat_1d_const;
typedef tdual_vfloat_1d::t_host_um t_vfloat_1d_um;
typedef tdual_vfloat_1d::t_host_const_um t_vfloat_1d_const_um;
typedef tdual_vfloat_1d::t_host_const_randomread t_vfloat_1d_randomread;
//2d V_FLOAT array n*m
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
typedef tdual_vfloat_2d::t_host t_vfloat_2d;
typedef tdual_vfloat_2d::t_host_const t_vfloat_2d_const;
typedef tdual_vfloat_2d::t_host_um t_vfloat_2d_um;
typedef tdual_vfloat_2d::t_host_const_um t_vfloat_2d_const_um;
typedef tdual_vfloat_2d::t_host_const_randomread t_vfloat_2d_randomread;
//2d V_FLOAT array n*3
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
typedef tdual_v_array::t_host t_v_array;
typedef tdual_v_array::t_host_const t_v_array_const;
typedef tdual_v_array::t_host_um t_v_array_um;
typedef tdual_v_array::t_host_const_um t_v_array_const_um;
typedef tdual_v_array::t_host_const_randomread t_v_array_randomread;
//Force Types
//1d F_FLOAT array n
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
typedef tdual_ffloat_1d::t_host t_ffloat_1d;
typedef tdual_ffloat_1d::t_host_const t_ffloat_1d_const;
typedef tdual_ffloat_1d::t_host_um t_ffloat_1d_um;
typedef tdual_ffloat_1d::t_host_const_um t_ffloat_1d_const_um;
typedef tdual_ffloat_1d::t_host_const_randomread t_ffloat_1d_randomread;
//2d F_FLOAT array n*m
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
typedef tdual_ffloat_2d::t_host t_ffloat_2d;
typedef tdual_ffloat_2d::t_host_const t_ffloat_2d_const;
typedef tdual_ffloat_2d::t_host_um t_ffloat_2d_um;
typedef tdual_ffloat_2d::t_host_const_um t_ffloat_2d_const_um;
typedef tdual_ffloat_2d::t_host_const_randomread t_ffloat_2d_randomread;
//2d F_FLOAT array n*3
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
typedef tdual_f_array::t_host t_f_array;
typedef tdual_f_array::t_host_const t_f_array_const;
typedef tdual_f_array::t_host_um t_f_array_um;
typedef tdual_f_array::t_host_const_um t_f_array_const_um;
typedef tdual_f_array::t_host_const_randomread t_f_array_randomread;
//2d F_FLOAT array n*6 (for virial)
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
typedef tdual_virial_array::t_host t_virial_array;
typedef tdual_virial_array::t_host_const t_virial_array_const;
typedef tdual_virial_array::t_host_um t_virial_array_um;
typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
//Energy Types
//1d E_FLOAT array n
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
typedef tdual_efloat_1d::t_host t_efloat_1d;
typedef tdual_efloat_1d::t_host_const t_efloat_1d_const;
typedef tdual_efloat_1d::t_host_um t_efloat_1d_um;
typedef tdual_efloat_1d::t_host_const_um t_efloat_1d_const_um;
typedef tdual_efloat_1d::t_host_const_randomread t_efloat_1d_randomread;
//2d E_FLOAT array n*m
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
typedef tdual_efloat_2d::t_host t_efloat_2d;
typedef tdual_efloat_2d::t_host_const t_efloat_2d_const;
typedef tdual_efloat_2d::t_host_um t_efloat_2d_um;
typedef tdual_efloat_2d::t_host_const_um t_efloat_2d_const_um;
typedef tdual_efloat_2d::t_host_const_randomread t_efloat_2d_randomread;
//2d E_FLOAT array n*3
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
typedef tdual_e_array::t_host t_e_array;
typedef tdual_e_array::t_host_const t_e_array_const;
typedef tdual_e_array::t_host_um t_e_array_um;
typedef tdual_e_array::t_host_const_um t_e_array_const_um;
typedef tdual_e_array::t_host_const_randomread t_e_array_randomread;
//Neighbor Types
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
typedef tdual_neighbors_2d::t_host t_neighbors_2d;
typedef tdual_neighbors_2d::t_host_const t_neighbors_2d_const;
typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
};
#endif
//default LAMMPS Types
typedef struct ArrayTypes<LMPDeviceType> DAT;
typedef struct ArrayTypes<LMPHostType> HAT;
template<class DeviceType, class BufferView, class DualView>
void buffer_view(BufferView &buf, DualView &view,
const size_t n0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
buf = BufferView(
view.template view<DeviceType>().ptr_on_device(),
n0,n1,n2,n3,n4,n5,n6,n7);
}
template<class DeviceType>
struct MemsetZeroFunctor {
typedef DeviceType device_type ;
void* ptr;
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
((int*)ptr)[i] = 0;
}
};
template<class ViewType>
void memset_kokkos (ViewType &view) {
static MemsetZeroFunctor<typename ViewType::device_type> f;
f.ptr = view.ptr_on_device();
Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
ViewType::device_type::fence();
}
#endif

208
src/KOKKOS/memory_kokkos.h Normal file
View File

@ -0,0 +1,208 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Kokkos versions of create/grow/destroy multi-dimensional arrays
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
create a 1d array
------------------------------------------------------------------------- */
template <typename TYPE>
TYPE create_kokkos(TYPE &data, typename TYPE::value_type *&array,
int n1, const char *name)
{
data = TYPE(name,n1);
array = data.h_view.ptr_on_device();
return data;
}
template <typename TYPE, typename HTYPE>
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
typename TYPE::value_type *&array, int n1,
const char *name)
{
data = TYPE(std::string(name),n1);
#ifndef KOKKOS_USE_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
array = h_data.ptr_on_device();
return data;
}
template <typename TYPE, typename HTYPE>
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
int n1, const char *name)
{
data = TYPE(std::string(name),n1);
#ifndef KOKKOS_USE_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
return data;
}
/* ----------------------------------------------------------------------
grow or shrink 1st dim of a 1d array
last dim must stay the same
------------------------------------------------------------------------- */
template <typename TYPE>
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type *&array,
int n1, const char *name)
{
if (array == NULL) return create_kokkos(data,array,n1,name);
data.resize(n1);
array = data.h_view.ptr_on_device();
return data;
}
template <typename TYPE>
void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
{
if (array == NULL) return;
data = TYPE();
array = NULL;
}
/* ----------------------------------------------------------------------
create a 2d array
------------------------------------------------------------------------- */
template <typename TYPE>
TYPE create_kokkos(TYPE &data, int n1, int n2, const char *name)
{
data = TYPE(name,n1,n2);
return data;
}
template <typename TYPE, typename HTYPE>
TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2,
const char *name)
{
data = TYPE(std::string(name),n1,n2);
#ifndef KOKKOS_USE_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
return data;
}
template <typename TYPE>
TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
int n1, int n2, const char *name)
{
data = TYPE(std::string(name),n1,n2);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type **) smalloc(nbytes,name);
bigint n = 0;
for (int i = 0; i < n1; i++) {
array[i] = &data.h_view(i,0);
n += n2;
}
return data;
}
template <typename TYPE, typename HTYPE>
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
typename TYPE::value_type **&array, int n1, int n2,
const char *name)
{
data = TYPE(std::string(name),n1,n2);
#ifndef KOKKOS_USE_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type **) smalloc(nbytes,name);
bigint n = 0;
for (int i = 0; i < n1; i++) {
array[i] = &h_data(i,0);
n += n2;
}
return data;
}
/* ----------------------------------------------------------------------
grow or shrink 1st dim of a 2d array
last dim must stay the same
------------------------------------------------------------------------- */
template <typename TYPE>
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
int n1, int n2, const char *name)
{
if (array == NULL) return create_kokkos(data,array,n1,n2,name);
data.resize(n1,n2);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
for (int i = 0; i < n1; i++)
array[i] = &data.h_view(i,0);
return data;
}
template <typename TYPE>
TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
int n1, const char *name)
{
data = TYPE(std::string(name),n1);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type **) smalloc(nbytes,name);
for (int i = 0; i < n1; i++)
array[i] = &data.h_view(i,0);
return data;
}
template <typename TYPE>
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
int n1, const char *name)
{
if (array == NULL) return create_kokkos(data,array,n1,name);
data.resize(n1);
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type **) smalloc(nbytes,name);
for (int i = 0; i < n1; i++)
array[i] = &data.h_view(i,0);
return data;
}
/* ----------------------------------------------------------------------
destroy a 2d array
------------------------------------------------------------------------- */
template <typename TYPE>
void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
{
if (array == NULL) return;
data = TYPE();
sfree(array);
array = NULL;
}

View File

@ -0,0 +1,585 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "modify_kokkos.h"
#include "atom_kokkos.h"
#include "update.h"
#include "fix.h"
#include "compute.h"
using namespace LAMMPS_NS;
#define BIG 1.0e20
/* ---------------------------------------------------------------------- */
ModifyKokkos::ModifyKokkos(LAMMPS *lmp) : Modify(lmp)
{
atomKK = (AtomKokkos *) atom;
}
/* ----------------------------------------------------------------------
setup for run, calls setup() of all fixes and computes
called from Verlet, RESPA, Min
------------------------------------------------------------------------- */
void ModifyKokkos::setup(int vflag)
{
// compute setup needs to come before fix setup
// b/c NH fixes need use DOF of temperature computes
for (int i = 0; i < ncompute; i++) compute[i]->setup();
if (update->whichflag == 1)
for (int i = 0; i < nfix; i++) {
atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
fix[i]->setup(vflag);
}
else if (update->whichflag == 2)
for (int i = 0; i < nfix; i++) {
atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
fix[i]->min_setup(vflag);
}
}
/* ----------------------------------------------------------------------
setup pre_exchange call, only for fixes that define pre_exchange
called from Verlet, RESPA, Min, and WriteRestart with whichflag = 0
------------------------------------------------------------------------- */
void ModifyKokkos::setup_pre_exchange()
{
if (update->whichflag <= 1)
for (int i = 0; i < n_pre_exchange; i++) {
atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
fix[list_pre_exchange[i]]->datamask_read);
atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
fix[list_pre_exchange[i]]->datamask_modify);
fix[list_pre_exchange[i]]->setup_pre_exchange();
}
else if (update->whichflag == 2)
for (int i = 0; i < n_min_pre_exchange; i++) {
atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
fix[list_min_pre_exchange[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
fix[list_min_pre_exchange[i]]->datamask_modify);
fix[list_min_pre_exchange[i]]->min_setup_pre_exchange();
}
}
/* ----------------------------------------------------------------------
setup pre_neighbor call, only for fixes that define pre_neighbor
called from Verlet, RESPA
------------------------------------------------------------------------- */
void ModifyKokkos::setup_pre_neighbor()
{
if (update->whichflag == 1)
for (int i = 0; i < n_pre_neighbor; i++) {
atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
fix[list_pre_neighbor[i]]->datamask_read);
atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
fix[list_pre_neighbor[i]]->datamask_modify);
fix[list_pre_neighbor[i]]->setup_pre_neighbor();
}
else if (update->whichflag == 2)
for (int i = 0; i < n_min_pre_neighbor; i++) {
atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
fix[list_min_pre_neighbor[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
fix[list_min_pre_neighbor[i]]->datamask_modify);
fix[list_min_pre_neighbor[i]]->min_setup_pre_neighbor();
}
}
/* ----------------------------------------------------------------------
setup pre_force call, only for fixes that define pre_force
called from Verlet, RESPA, Min
------------------------------------------------------------------------- */
void ModifyKokkos::setup_pre_force(int vflag)
{
if (update->whichflag == 1)
for (int i = 0; i < n_pre_force; i++) {
atomKK->sync(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_read);
atomKK->modified(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_modify);
fix[list_pre_force[i]]->setup_pre_force(vflag);
}
else if (update->whichflag == 2)
for (int i = 0; i < n_min_pre_force; i++) {
atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
fix[list_min_pre_force[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
fix[list_min_pre_force[i]]->datamask_modify);
fix[list_min_pre_force[i]]->min_setup_pre_force(vflag);
}
}
/* ----------------------------------------------------------------------
1st half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::initial_integrate(int vflag)
{
for (int i = 0; i < n_initial_integrate; i++) {
atomKK->sync(fix[list_initial_integrate[i]]->execution_space,
fix[list_initial_integrate[i]]->datamask_read);
atomKK->modified(fix[list_initial_integrate[i]]->execution_space,
fix[list_initial_integrate[i]]->datamask_modify);
fix[list_initial_integrate[i]]->initial_integrate(vflag);
}
}
/* ----------------------------------------------------------------------
post_integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::post_integrate()
{
for (int i = 0; i < n_post_integrate; i++) {
atomKK->sync(fix[list_post_integrate[i]]->execution_space,
fix[list_post_integrate[i]]->datamask_read);
atomKK->modified(fix[list_post_integrate[i]]->execution_space,
fix[list_post_integrate[i]]->datamask_modify);
fix[list_post_integrate[i]]->post_integrate();
}
}
/* ----------------------------------------------------------------------
pre_exchange call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::pre_exchange()
{
for (int i = 0; i < n_pre_exchange; i++) {
atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
fix[list_pre_exchange[i]]->datamask_read);
atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
fix[list_pre_exchange[i]]->datamask_modify);
fix[list_pre_exchange[i]]->pre_exchange();
}
}
/* ----------------------------------------------------------------------
pre_neighbor call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::pre_neighbor()
{
for (int i = 0; i < n_pre_neighbor; i++) {
atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
fix[list_pre_neighbor[i]]->datamask_read);
atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
fix[list_pre_neighbor[i]]->datamask_modify);
fix[list_pre_neighbor[i]]->pre_neighbor();
}
}
/* ----------------------------------------------------------------------
pre_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::pre_force(int vflag)
{
for (int i = 0; i < n_pre_force; i++) {
atomKK->sync(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_read);
atomKK->modified(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_modify);
fix[list_pre_force[i]]->pre_force(vflag);
}
}
/* ----------------------------------------------------------------------
post_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::post_force(int vflag)
{
for (int i = 0; i < n_post_force; i++) {
atomKK->sync(fix[list_post_force[i]]->execution_space,
fix[list_post_force[i]]->datamask_read);
atomKK->modified(fix[list_post_force[i]]->execution_space,
fix[list_post_force[i]]->datamask_modify);
fix[list_post_force[i]]->post_force(vflag);
}
}
/* ----------------------------------------------------------------------
2nd half of integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::final_integrate()
{
for (int i = 0; i < n_final_integrate; i++) {
atomKK->sync(fix[list_final_integrate[i]]->execution_space,
fix[list_final_integrate[i]]->datamask_read);
atomKK->modified(fix[list_final_integrate[i]]->execution_space,
fix[list_final_integrate[i]]->datamask_modify);
fix[list_final_integrate[i]]->final_integrate();
}
}
/* ----------------------------------------------------------------------
end-of-timestep call, only for relevant fixes
only call fix->end_of_step() on timesteps that are multiples of nevery
------------------------------------------------------------------------- */
void ModifyKokkos::end_of_step()
{
for (int i = 0; i < n_end_of_step; i++)
if (update->ntimestep % end_of_step_every[i] == 0) {
atomKK->sync(fix[list_end_of_step[i]]->execution_space,
fix[list_end_of_step[i]]->datamask_read);
atomKK->modified(fix[list_end_of_step[i]]->execution_space,
fix[list_end_of_step[i]]->datamask_modify);
fix[list_end_of_step[i]]->end_of_step();
}
}
/* ----------------------------------------------------------------------
thermo energy call, only for relevant fixes
called by Thermo class
compute_scalar() is fix call to return energy
------------------------------------------------------------------------- */
double ModifyKokkos::thermo_energy()
{
double energy = 0.0;
for (int i = 0; i < n_thermo_energy; i++) {
atomKK->sync(fix[list_thermo_energy[i]]->execution_space,
fix[list_thermo_energy[i]]->datamask_read);
atomKK->modified(fix[list_thermo_energy[i]]->execution_space,
fix[list_thermo_energy[i]]->datamask_modify);
energy += fix[list_thermo_energy[i]]->compute_scalar();
}
return energy;
}
/* ----------------------------------------------------------------------
post_run call
------------------------------------------------------------------------- */
void ModifyKokkos::post_run()
{
for (int i = 0; i < nfix; i++) {
atomKK->sync(fix[i]->execution_space,
fix[i]->datamask_read);
atomKK->modified(fix[i]->execution_space,
fix[i]->datamask_modify);
fix[i]->post_run();
}
}
/* ----------------------------------------------------------------------
setup rRESPA pre_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::setup_pre_force_respa(int vflag, int ilevel)
{
for (int i = 0; i < n_pre_force; i++) {
atomKK->sync(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_read);
atomKK->modified(fix[list_pre_force[i]]->execution_space,
fix[list_pre_force[i]]->datamask_modify);
fix[list_pre_force[i]]->setup_pre_force_respa(vflag,ilevel);
}
}
/* ----------------------------------------------------------------------
1st half of rRESPA integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::initial_integrate_respa(int vflag, int ilevel, int iloop)
{
for (int i = 0; i < n_initial_integrate_respa; i++) {
atomKK->sync(fix[list_initial_integrate_respa[i]]->execution_space,
fix[list_initial_integrate_respa[i]]->datamask_read);
atomKK->modified(fix[list_initial_integrate_respa[i]]->execution_space,
fix[list_initial_integrate_respa[i]]->datamask_modify);
fix[list_initial_integrate_respa[i]]->
initial_integrate_respa(vflag,ilevel,iloop);
}
}
/* ----------------------------------------------------------------------
rRESPA post_integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::post_integrate_respa(int ilevel, int iloop)
{
for (int i = 0; i < n_post_integrate_respa; i++) {
atomKK->sync(fix[list_post_integrate_respa[i]]->execution_space,
fix[list_post_integrate_respa[i]]->datamask_read);
atomKK->modified(fix[list_post_integrate_respa[i]]->execution_space,
fix[list_post_integrate_respa[i]]->datamask_modify);
fix[list_post_integrate_respa[i]]->post_integrate_respa(ilevel,iloop);
}
}
/* ----------------------------------------------------------------------
rRESPA pre_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::pre_force_respa(int vflag, int ilevel, int iloop)
{
for (int i = 0; i < n_pre_force_respa; i++) {
atomKK->sync(fix[list_pre_force_respa[i]]->execution_space,
fix[list_pre_force_respa[i]]->datamask_read);
atomKK->modified(fix[list_pre_force_respa[i]]->execution_space,
fix[list_pre_force_respa[i]]->datamask_modify);
fix[list_pre_force_respa[i]]->pre_force_respa(vflag,ilevel,iloop);
}
}
/* ----------------------------------------------------------------------
rRESPA post_force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::post_force_respa(int vflag, int ilevel, int iloop)
{
for (int i = 0; i < n_post_force_respa; i++) {
atomKK->sync(fix[list_post_force_respa[i]]->execution_space,
fix[list_post_force_respa[i]]->datamask_read);
atomKK->modified(fix[list_post_force_respa[i]]->execution_space,
fix[list_post_force_respa[i]]->datamask_modify);
fix[list_post_force_respa[i]]->post_force_respa(vflag,ilevel,iloop);
}
}
/* ----------------------------------------------------------------------
2nd half of rRESPA integrate call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::final_integrate_respa(int ilevel, int iloop)
{
for (int i = 0; i < n_final_integrate_respa; i++) {
atomKK->sync(fix[list_final_integrate_respa[i]]->execution_space,
fix[list_final_integrate_respa[i]]->datamask_read);
atomKK->modified(fix[list_final_integrate_respa[i]]->execution_space,
fix[list_final_integrate_respa[i]]->datamask_modify);
fix[list_final_integrate_respa[i]]->final_integrate_respa(ilevel,iloop);
}
}
/* ----------------------------------------------------------------------
minimizer pre-exchange call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_pre_exchange()
{
for (int i = 0; i < n_min_pre_exchange; i++) {
atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
fix[list_min_pre_exchange[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
fix[list_min_pre_exchange[i]]->datamask_modify);
fix[list_min_pre_exchange[i]]->min_pre_exchange();
}
}
/* ----------------------------------------------------------------------
minimizer pre-neighbor call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_pre_neighbor()
{
for (int i = 0; i < n_min_pre_neighbor; i++) {
atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
fix[list_min_pre_neighbor[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
fix[list_min_pre_neighbor[i]]->datamask_modify);
fix[list_min_pre_neighbor[i]]->min_pre_neighbor();
}
}
/* ----------------------------------------------------------------------
minimizer pre-force call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_pre_force(int vflag)
{
for (int i = 0; i < n_min_pre_force; i++) {
atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
fix[list_min_pre_force[i]]->datamask_read);
atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
fix[list_min_pre_force[i]]->datamask_modify);
fix[list_min_pre_force[i]]->min_pre_force(vflag);
}
}
/* ----------------------------------------------------------------------
minimizer force adjustment call, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_post_force(int vflag)
{
for (int i = 0; i < n_min_post_force; i++) {
atomKK->sync(fix[list_min_post_force[i]]->execution_space,
fix[list_min_post_force[i]]->datamask_read);
atomKK->modified(fix[list_min_post_force[i]]->execution_space,
fix[list_min_post_force[i]]->datamask_modify);
fix[list_min_post_force[i]]->min_post_force(vflag);
}
}
/* ----------------------------------------------------------------------
minimizer energy/force evaluation, only for relevant fixes
return energy and forces on extra degrees of freedom
------------------------------------------------------------------------- */
double ModifyKokkos::min_energy(double *fextra)
{
int ifix,index;
index = 0;
double eng = 0.0;
for (int i = 0; i < n_min_energy; i++) {
ifix = list_min_energy[i];
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
eng += fix[ifix]->min_energy(&fextra[index]);
index += fix[ifix]->min_dof();
}
return eng;
}
/* ----------------------------------------------------------------------
store current state of extra dof, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_store()
{
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
fix[list_min_energy[i]]->min_store();
}
}
/* ----------------------------------------------------------------------
mange state of extra dof on a stack, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_clearstore()
{
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
fix[list_min_energy[i]]->min_clearstore();
}
}
void ModifyKokkos::min_pushstore()
{
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
fix[list_min_energy[i]]->min_pushstore();
}
}
void ModifyKokkos::min_popstore()
{
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
fix[list_min_energy[i]]->min_popstore();
}
}
/* ----------------------------------------------------------------------
displace extra dof along vector hextra, only for relevant fixes
------------------------------------------------------------------------- */
void ModifyKokkos::min_step(double alpha, double *hextra)
{
int ifix,index;
index = 0;
for (int i = 0; i < n_min_energy; i++) {
ifix = list_min_energy[i];
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
fix[ifix]->min_step(alpha,&hextra[index]);
index += fix[ifix]->min_dof();
}
}
/* ----------------------------------------------------------------------
compute max allowed step size along vector hextra, only for relevant fixes
------------------------------------------------------------------------- */
double ModifyKokkos::max_alpha(double *hextra)
{
int ifix,index;
double alpha = BIG;
index = 0;
for (int i = 0; i < n_min_energy; i++) {
ifix = list_min_energy[i];
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
double alpha_one = fix[ifix]->max_alpha(&hextra[index]);
alpha = MIN(alpha,alpha_one);
index += fix[ifix]->min_dof();
}
return alpha;
}
/* ----------------------------------------------------------------------
extract extra dof for minimization, only for relevant fixes
------------------------------------------------------------------------- */
int ModifyKokkos::min_dof()
{
int ndof = 0;
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
ndof += fix[list_min_energy[i]]->min_dof();
}
return ndof;
}
/* ----------------------------------------------------------------------
reset reference state of fix, only for relevant fixes
------------------------------------------------------------------------- */
int ModifyKokkos::min_reset_ref()
{
int itmp,itmpall;
itmpall = 0;
for (int i = 0; i < n_min_energy; i++) {
atomKK->sync(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_read);
atomKK->modified(fix[list_min_energy[i]]->execution_space,
fix[list_min_energy[i]]->datamask_modify);
itmp = fix[list_min_energy[i]]->min_reset_ref();
if (itmp) itmpall = 1;
}
return itmpall;
}

View File

@ -0,0 +1,73 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_MODIFY_KOKKOS_H
#define LMP_MODIFY_KOKKOS_H
#include "modify.h"
namespace LAMMPS_NS {
class ModifyKokkos : public Modify {
public:
ModifyKokkos(class LAMMPS *);
~ModifyKokkos() {}
void setup(int);
void setup_pre_exchange();
void setup_pre_neighbor();
void setup_pre_force(int);
void initial_integrate(int);
void post_integrate();
void pre_decide();
void pre_exchange();
void pre_neighbor();
void pre_force(int);
void post_force(int);
void final_integrate();
void end_of_step();
double thermo_energy();
void post_run();
void setup_pre_force_respa(int, int);
void initial_integrate_respa(int, int, int);
void post_integrate_respa(int, int);
void pre_force_respa(int, int, int);
void post_force_respa(int, int, int);
void final_integrate_respa(int, int);
void min_pre_exchange();
void min_pre_neighbor();
void min_pre_force(int);
void min_post_force(int);
double min_energy(double *);
void min_store();
void min_step(double, double *);
void min_clearstore();
void min_pushstore();
void min_popstore();
double max_alpha(double *);
int min_dof();
int min_reset_ref();
protected:
class AtomKokkos *atomKK;
};
}
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,507 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "atom_kokkos.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH>
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
{
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nall,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
bboxhi,bboxlo);
k_cutneighsq.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#if DEVICE==2
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
#endif
if(newton_pair) {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#if DEVICE==2
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#if DEVICE==2
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
list->inum = nall;
list->gnum = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
resize() = 1;
}
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh,int GhostNewton>
void NeighborKokkosExecute<Device>::
build_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
// printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
n++;
}
}
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
//if(i==0)
//printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
const int jtype = type(j);
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
//if(i==0)
//printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
n++;
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
#if DEVICE==2
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
{
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.dimension_1();
const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
const int MY_BIN = threadIdx.x/atoms_per_bin;
const int MY_II = threadIdx.x%atoms_per_bin;
const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
if(ibin >=c_bincount.dimension_0()) return;
X_FLOAT* other_x = sharedmem;
other_x = other_x + 5*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[4 * atoms_per_bin];
int bincount_current = c_bincount[ibin];
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
int n = 0;
X_FLOAT xtmp;
X_FLOAT ytmp;
X_FLOAT ztmp;
int itype;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
if(i >= 0) {
xtmp = x(i, 0);
ytmp = x(i, 1);
ztmp = x(i, 2);
itype = type(i);
other_x[MY_II] = xtmp;
other_x[MY_II + atoms_per_bin] = ytmp;
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
other_x[MY_II + 3 * atoms_per_bin] = itype;
}
other_id[MY_II] = i;
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
if(test) return;
if(i >= 0 && i < nlocal) {
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
int j = other_id[m];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
//if(j==i) continue;
if((j == i) || (HalfNeigh && (j < i))) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const int jtype = other_x[m + 3 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
}
}
__syncthreads();
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
if(ibin == jbin) continue;
bincount_current = c_bincount[jbin];
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
if(j >= 0) {
other_x[MY_II] = x(j, 0);
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
other_x[MY_II + 3 * atoms_per_bin] = type(j);
}
other_id[MY_II] = j;
__syncthreads();
if(i >= 0 && i < nlocal) {
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
if(HalfNeigh && (j < i)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const int jtype = other_x[m + 3 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
}
}
__syncthreads();
}
if(i >= 0 && i < nlocal) {
neigh_list.d_numneigh(i) = n;
neigh_list.d_ilist(i) = i;
}
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
}
#endif
template<class DeviceType>
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
{
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nall,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
bboxhi,bboxlo);
k_cutneighsq.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
DeviceType::fence();
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#if DEVICE==2
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
#endif
if(newton_pair) {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#if DEVICE==2
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
} else {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#if DEVICE==2
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
list->inum = nall;
list->gnum = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int ClusterSize>
void NeighborKokkosExecute<Device>::
build_cluster_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
bool skip = i == j;
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
if(!skip) {
const int jtype = type(j);
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
n++;
//m += ClusterSize - j&(ClusterSize-1)-1;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}

View File

@ -0,0 +1,118 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neigh_list_kokkos.h"
#include "atom.h"
#include "memory.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI};
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::clean_copy()
{
ilist = NULL;
numneigh = NULL;
firstneigh = NULL;
firstdouble = NULL;
dnum = 0;
iskip = NULL;
ijskip = NULL;
ipage = NULL;
dpage = NULL;
maxstencil = 0;
ghostflag = 0;
maxstencil_multi = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::grow(int nmax)
{
// skip if this list is already long enough to store nmax atoms
if (nmax <= maxatoms) return;
maxatoms = nmax;
d_ilist =
typename ArrayTypes<Device>::t_int_1d("neighlist:ilist",maxatoms);
d_numneigh =
typename ArrayTypes<Device>::t_int_1d("neighlist:numneigh",maxatoms);
d_neighbors =
typename ArrayTypes<Device>::t_neighbors_2d("neighlist:neighbors",
maxatoms,maxneighs);
memory->sfree(firstneigh);
memory->sfree(firstdouble);
firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *),
"neighlist:firstneigh");
if (dnum)
firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *),
"neighlist:firstdouble");
}
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
{
int i;
if (style == BIN) {
if (smax > maxstencil) {
maxstencil = smax;
d_stencil =
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
memory->destroy(stencilxyz);
memory->create(stencilxyz,maxstencil,3,"neighlist:stencilxyz");
}
}
} else {
int n = atom->ntypes;
if (maxstencil_multi == 0) {
nstencil_multi = new int[n+1];
stencil_multi = new int*[n+1];
distsq_multi = new double*[n+1];
for (i = 1; i <= n; i++) {
nstencil_multi[i] = 0;
stencil_multi[i] = NULL;
distsq_multi[i] = NULL;
}
}
if (smax > maxstencil_multi) {
maxstencil_multi = smax;
for (i = 1; i <= n; i++) {
memory->destroy(stencil_multi[i]);
memory->destroy(distsq_multi[i]);
memory->create(stencil_multi[i],maxstencil_multi,
"neighlist:stencil_multi");
memory->create(distsq_multi[i],maxstencil_multi,
"neighlist:distsq_multi");
}
}
}
}
template class NeighListKokkos<LMPDeviceType>;
#if DEVICE==2
template class NeighListKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,104 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_NEIGH_LIST_KOKKOS_H
#define LMP_NEIGH_LIST_KOKKOS_H
#include "pointers.h"
#include "neigh_list.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
class AtomNeighbors
{
public:
const int num_neighs;
KOKKOS_INLINE_FUNCTION
AtomNeighbors(int* const & firstneigh, const int & _num_neighs,
const int & stride):
_firstneigh(firstneigh), _stride(stride), num_neighs(_num_neighs) {};
KOKKOS_INLINE_FUNCTION
int& operator()(const int &i) const {
return _firstneigh[i*_stride];
}
private:
int* const _firstneigh;
const int _stride;
};
class AtomNeighborsConst
{
public:
const int* const _firstneigh;
const int numneigh;
KOKKOS_INLINE_FUNCTION
AtomNeighborsConst(int* const & firstneigh, const int & _numneigh,
const int & stride):
_firstneigh(firstneigh), _stride(stride), numneigh(_numneigh) {};
KOKKOS_INLINE_FUNCTION
const int& operator()(const int &i) const {
return _firstneigh[i*_stride];
}
private:
//const int* const _firstneigh;
const int _stride;
};
template<class Device>
class NeighListKokkos: public NeighList {
int _stride;
public:
int maxneighs;
void clean_copy();
void grow(int nmax);
typename ArrayTypes<Device>::t_neighbors_2d d_neighbors;
typename ArrayTypes<Device>::t_int_1d d_ilist; // local indices of I atoms
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
KOKKOS_INLINE_FUNCTION
AtomNeighbors get_neighbors(const int &i) const {
return AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
&d_neighbors(i,1)-&d_neighbors(i,0));
}
KOKKOS_INLINE_FUNCTION
AtomNeighborsConst get_neighbors_const(const int &i) const {
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
&d_neighbors(i,1)-&d_neighbors(i,0));
}
KOKKOS_INLINE_FUNCTION
int& num_neighs(const int & i) const {
return d_numneigh(i);
}
void stencil_allocate(int smax, int style);
};
}
#endif

View File

@ -0,0 +1,269 @@
;/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor_kokkos.h"
#include "atom.h"
#include "pair.h"
#include "neigh_request.h"
#include "memory.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
/* ---------------------------------------------------------------------- */
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp)
{
atoms_per_bin = 16;
nlist_host = 0;
lists_host = NULL;
pair_build_host = NULL;
stencil_create_host = NULL;
nlist_device = 0;
lists_device = NULL;
pair_build_device = NULL;
stencil_create_device = NULL;
}
/* ---------------------------------------------------------------------- */
NeighborKokkos::~NeighborKokkos()
{
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
cutneighsq = NULL;
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] pair_build_host;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Neighbor::init();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_cutneighsq_kokkos(int n)
{
memory->create_kokkos(k_cutneighsq,cutneighsq,n+1,n+1,"neigh:cutneighsq");
k_cutneighsq.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
delete [] pair_build_host;
delete [] stencil_create_host;
nlist_host = 0;
for (i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] stencil_create_device;
nlist_device = 0;
nlist = 0;
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_device) nlist_device++;
else if (requests[i]->kokkos_host) nlist_host++;
else nlist++;
}
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
pair_build_host = new PairPtrHost[nrequest];
stencil_create_host = new StencilPtrHost[nrequest];
for (i = 0; i < nrequest; i++) {
lists_host[i] = NULL;
pair_build_host[i] = NULL;
stencil_create_host[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_host) continue;
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
lists_host[i]->index = i;
lists_host[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_host[i]);
}
}
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
pair_build_device = new PairPtrDevice[nrequest];
stencil_create_device = new StencilPtrDevice[nrequest];
for (i = 0; i < nrequest; i++) {
lists_device[i] = NULL;
pair_build_device[i] = NULL;
stencil_create_device[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_device) continue;
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
lists_device[i]->index = i;
lists_device[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_device[i]);
}
}
// return # of non-Kokkos lists
return nlist;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags1_kokkos(int i)
{
if (lists_host[i]) {
lists_host[i]->buildflag = 1;
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
lists_host[i]->growflag = 1;
if (requests[i]->copy) lists_host[i]->growflag = 0;
lists_host[i]->stencilflag = 1;
if (style == NSQ) lists_host[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
lists_host[i]->ghostflag = 0;
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
if (lists_device[i]) {
lists_device[i]->buildflag = 1;
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
lists_device[i]->growflag = 1;
if (requests[i]->copy) lists_device[i]->growflag = 0;
lists_device[i]->stencilflag = 1;
if (style == NSQ) lists_device[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
lists_device[i]->ghostflag = 0;
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags2_kokkos(int i)
{
if (lists_host[i]) {
if (lists_host[i]->buildflag) blist[nblist++] = i;
if (lists_host[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
if (lists_device[i]) {
if (lists_device[i]->buildflag) blist[nblist++] = i;
if (lists_device[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_grow_kokkos(int i)
{
if (lists_host[i]!=NULL && lists_host[i]->growflag)
lists_host[i]->grow(maxatom);
if (lists_device[i]!=NULL && lists_device[i]->growflag)
lists_device[i]->grow(maxatom);
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
if (rq->full) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0>;
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1>;
pair_build_host[index] = pb;
return;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1>;
pair_build_device[index] = pb;
return;
}
Neighbor::choose_build(index,rq);
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::build_kokkos(int i)
{
if (lists_host[blist[i]])
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
else if (lists_device[blist[i]])
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::setup_bins_kokkos(int i)
{
if (lists_host[slist[i]]) {
lists_host[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
} else if (lists_device[slist[i]]) {
lists_device[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
}
if (i < nslist-1) return;
if (maxhead > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
}
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -0,0 +1,257 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_NEIGHBOR_KOKKOS_H
#define LMP_NEIGHBOR_KOKKOS_H
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class Device>
class NeighborKokkosExecute
{
typedef ArrayTypes<Device> AT;
public:
NeighListKokkos<Device> neigh_list;
const typename AT::t_xfloat_2d_randomread cutneighsq;
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int GhostNewton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int ClusterSize>
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#if DEVICE==2
template<int HalfNeigh>
__device__ inline
void build_ItemCuda(Device dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
};
template<class Device>
struct NeighborKokkosBinAtomsFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
c(_c) {};
~NeighborKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
struct NeighborKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#if DEVICE==2
KOKKOS_INLINE_FUNCTION
void operator() (Device dev) const {
c.template build_ItemCuda<HALF_NEIGH>(dev);
}
size_t shmem_size() const { return sharedsize; }
#endif
};
template<class Device,int ClusterSize>
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_cluster_Item<ClusterSize>(i);
}
};
class NeighborKokkos : public Neighbor {
public:
class AtomKokkos *atomKK;
int nlist_host; // pairwise neighbor lists on Host
NeighListKokkos<LMPHostType> **lists_host;
int nlist_device; // pairwise neighbor lists on Device
NeighListKokkos<LMPDeviceType> **lists_device;
NeighborKokkos(class LAMMPS *);
~NeighborKokkos();
void init();
private:
int atoms_per_bin;
DAT::tdual_xfloat_2d k_cutneighsq;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void choose_build(int, NeighRequest *);
void build_kokkos(int);
void setup_bins_kokkos(int);
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;
typedef void (NeighborKokkos::*PairPtrDevice)
(class NeighListKokkos<LMPDeviceType> *);
PairPtrDevice *pair_build_device;
template<class DeviceType,int HALF_NEIGH>
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
template<class DeviceType>
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
typedef void (NeighborKokkos::*StencilPtrHost)
(class NeighListKokkos<LMPHostType> *, int, int, int);
StencilPtrHost *stencil_create_host;
typedef void (NeighborKokkos::*StencilPtrDevice)
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
StencilPtrDevice *stencil_create_device;
};
}
#endif
/* ERROR/WARNING messages:
*/

655
src/KOKKOS/pair_kokkos.h Normal file
View File

@ -0,0 +1,655 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
#else
#ifndef LMP_PAIR_KOKKOS_H
#define LMP_PAIR_KOKKOS_H
#include "Kokkos_Macros.hpp"
#include "pair.h"
#include "neigh_list_kokkos.h"
#include "Kokkos_Vectorization.hpp"
namespace LAMMPS_NS {
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
struct PairComputeFunctor {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& ii,
const NeighListKokkos<device_type> &list) const {
EV_FLOAT ev;
const int i = list.d_ilist[ii];
const X_FLOAT xtmp = c.x(i,0);
const X_FLOAT ytmp = c.x(i,1);
const X_FLOAT ztmp = c.x(i,2);
const int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = neighbors_i(jj);
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
j &= NEIGHMASK;
const X_FLOAT delx = xtmp - c.x(j,0);
const X_FLOAT dely = ytmp - c.x(j,1);
const X_FLOAT delz = ztmp - c.x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
}
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
c.f(j,0) -= delx*fpair;
c.f(j,1) -= dely*fpair;
c.f(j,2) -= delz*fpair;
}
if (EVFLAG) {
if (c.eflag) {
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
}
}
}
if (NEIGHFLAG == HALFTHREAD) {
Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
} else {
c.f(i,0) += fxtmp;
c.f(i,1) += fytmp;
c.f(i,2) += fztmp;
}
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
if (NEIGHFLAG) {
if (NEWTON_PAIR) {
ev.v[0] += v0;
ev.v[1] += v1;
ev.v[2] += v2;
ev.v[3] += v3;
ev.v[4] += v4;
ev.v[5] += v5;
} else {
if (i < c.nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (j < c.nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
} else {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
if (c.vflag_atom) {
if (NEWTON_PAIR || i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
if (NEWTON_PAIR || (NEIGHFLAG && j < c.nlocal)) {
c.d_vatom(j,0) += 0.5*v0;
c.d_vatom(j,1) += 0.5*v1;
c.d_vatom(j,2) += 0.5*v2;
c.d_vatom(j,3) += 0.5*v3;
c.d_vatom(j,4) += 0.5*v4;
c.d_vatom(j,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (c.newton_pair) compute_item<0,1>(i,list);
else compute_item<0,0>(i,list);
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(i,list);
else
energy_virial += compute_item<1,0>(i,list);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef Kokkos::Vectorization<device_type,NeighClusterSize> vectorization;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const device_type& dev,
const NeighListKokkos<device_type> &list) const {
EV_FLOAT ev;
const int i = vectorization::global_thread_rank(dev);
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
const X_FLOAT ztmp = c.c_x(i,2);
const int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
const int jjj = neighbors_i(jj);
for (int k = vectorization::begin(); k<NeighClusterSize; k+=vectorization::increment) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) continue;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (EVFLAG) {
if (c.eflag) {
ev.evdwl += 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += 0.5*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
}
}
}
}
const F_FLOAT fx = vectorization::reduce(fxtmp);
const F_FLOAT fy = vectorization::reduce(fytmp);
const F_FLOAT fz = vectorization::reduce(fztmp);
if(vectorization::is_lane_0(dev)) {
c.f(i,0) += fx;
c.f(i,1) += fy;
c.f(i,2) += fz;
}
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const device_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list);
else compute_item<0,0>(dev,list);
}
KOKKOS_INLINE_FUNCTION
void operator()(const device_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list);
else
energy_virial += compute_item<1,0>(dev,list);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& ii,
const NeighListKokkos<device_type> &list) const {
EV_FLOAT ev;
const int i = ii;//list.d_ilist[ii];
const X_FLOAT xtmp = c.x(i,0);
const X_FLOAT ytmp = c.x(i,1);
const X_FLOAT ztmp = c.x(i,2);
const int itype = c.type(i);
//const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = c.nall;
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = jj;//neighbors_i(jj);
if(i==j) continue;
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
j &= NEIGHMASK;
const X_FLOAT delx = xtmp - c.x(j,0);
const X_FLOAT dely = ytmp - c.x(j,1);
const X_FLOAT delz = ztmp - c.x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (EVFLAG) {
if (c.eflag) {
ev.evdwl += 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
if (c.COUL_FLAG)
ev.ecoul += 0.5*
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
}
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
}
}
}
c.f(i,0) += fxtmp;
c.f(i,1) += fytmp;
c.f(i,2) += fztmp;
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
if (i < c.nlocal) c.eatom[i] += epairhalf;
if (j < c.nlocal) c.eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
compute_item<0,0>(i,list);
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
energy_virial += compute_item<1,0>(i,list);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
if (fpair->neighflag == FULL) {
PairComputeFunctor<PairStyle,FULL,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALFTHREAD) {
PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALF) {
PairComputeFunctor<PairStyle,HALF,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == N2) {
PairComputeFunctor<PairStyle,N2,false,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
else Kokkos::parallel_for(fpair->nlocal,ff);
} else if (fpair->neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
}
} else {
if (fpair->neighflag == FULL) {
PairComputeFunctor<PairStyle,FULL,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALFTHREAD) {
PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == HALF) {
PairComputeFunctor<PairStyle,HALF,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
} else if (fpair->neighflag == N2) {
PairComputeFunctor<PairStyle,N2,true,Specialisation >
ff(fpair, list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
else Kokkos::parallel_for(fpair->nlocal,ff);
} else if (fpair->neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
}
}
return ev;
}
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,267 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_lj_cut_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutKokkos<DeviceType>::PairLJCutKokkos(LAMMPS *lmp) : PairLJCut(lmp)
{
respa_enable = 0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
cutsq = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairLJCutKokkos<DeviceType>::~PairLJCutKokkos()
{
if (allocated) {
k_cutsq = DAT::tdual_ffloat_2d();
memory->sfree(cutsq);
cutsq = NULL;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutKokkos<DeviceType>::cleanup_copy() {
// WHY needed: this prevents parent copy from deallocating any arrays
allocated = 0;
cutsq = NULL;
eatom = NULL;
vatom = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
double evdwl = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_params.template sync<DeviceType>();
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
x = atomKK->k_x.view<DeviceType>();
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
newton_pair = force->newton_pair;
// loop over neighbors of my atoms
EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
DeviceType::fence();
if (eflag) eng_vdwl += ev.evdwl;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) virial_fdotr_compute();
}
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutKokkos<DeviceType>::
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
const F_FLOAT forcelj = r6inv *
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
return forcelj*r2inv;
}
template<class DeviceType>
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT PairLJCutKokkos<DeviceType>::
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
const F_FLOAT r2inv = 1.0/rsq;
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
(STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutKokkos<DeviceType>::allocate()
{
PairLJCut::allocate();
int n = atom->ntypes;
memory->destroy(cutsq);
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJCut::params",n+1,n+1);
params = k_params.d_view;
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutKokkos<DeviceType>::settings(int narg, char **arg)
{
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
PairLJCut::settings(1,arg);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairLJCutKokkos<DeviceType>::init_style()
{
PairLJCut::init_style();
// error if rRESPA with inner levels
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
int respa = 0;
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
if (respa)
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
}
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
template<class DeviceType>
double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
{
double cutone = PairLJCut::init_one(i,j);
k_params.h_view(i,j).lj1 = lj1[i][j];
k_params.h_view(i,j).lj2 = lj2[i][j];
k_params.h_view(i,j).lj3 = lj3[i][j];
k_params.h_view(i,j).lj4 = lj4[i][j];
k_params.h_view(i,j).offset = offset[i][j];
k_params.h_view(i,j).cutsq = cutone*cutone;
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.template modify<LMPHostType>();
k_params.template modify<LMPHostType>();
return cutone;
}
template class PairLJCutKokkos<LMPDeviceType>;
#if DEVICE==2
template class PairLJCutKokkos<LMPHostType>;
#endif

View File

@ -0,0 +1,112 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/kk,PairLJCutKokkos<LMPDeviceType>)
PairStyle(lj/cut/kk/device,PairLJCutKokkos<LMPDeviceType>)
PairStyle(lj/cut/kk/host,PairLJCutKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_LJ_CUT_KOKKOS_H
#define LMP_PAIR_LJ_CUT_KOKKOS_H
#include "pair_kokkos.h"
#include "pair_lj_cut.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
~PairLJCutKokkos();
void compute(int, int);
void settings(int, char **);
void init_style();
double init_one(int, int);
struct params_lj{
params_lj(){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
params_lj(int i){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
F_FLOAT cutsq,lj1,lj2,lj3,lj4,offset;
};
protected:
void cleanup_copy();
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
return 0;
}
Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>::t_dev_const params;
params_lj m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired to space for 15 atom types
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array c_x;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
//typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
int newton_pair;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
class AtomKokkos *atomKK;
int neighflag;
int nlocal,nall,eflag,vflag;
void allocate();
friend class PairComputeFunctor<PairLJCutKokkos,FULL,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,352 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(table/kk,PairTableKokkos<LMPDeviceType>)
PairStyle(table/kk/device,PairTableKokkos<LMPDeviceType>)
PairStyle(table/kk/host,PairTableKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_TABLE_KOKKOS_H
#define LMP_PAIR_TABLE_KOKKOS_H
#include "pair.h"
#include "pair_kokkos.h"
#include "neigh_list_kokkos.h"
#include "atom_kokkos.h"
namespace LAMMPS_NS {
template<class Device,int TABSTYLE>
struct S_TableCompute {
enum {TabStyle = TABSTYLE};
};
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
class PairTableComputeFunctor;
template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairTableKokkos(class LAMMPS *);
virtual ~PairTableKokkos();
virtual void compute(int, int);
template<int TABSTYLE>
void compute_style(int, int);
/*template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR, int TABSTYLE>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const int& i,
const NeighListKokkos<DeviceType> &list) const;
*/
void settings(int, char **);
void coeff(int, char **);
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
void *extract(const char *, int &);
void init_style();
protected:
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
int tabstyle,tablength;
/*struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread cutsq;
typename ArrayTypes<DeviceType>::t_int_2d_randomread tabindex;
typename ArrayTypes<DeviceType>::t_int_1d_randomread nshiftbits,nmask;
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
};*/
//Its faster not to use texture fetch if the number of tables is less than 32!
struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
};
struct TableDevice {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
struct TableHost {
typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
typename ArrayTypes<LMPHostType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
struct Table {
int ninput,rflag,fpflag,match,ntablebits;
int nshiftbits,nmask;
double rlo,rhi,fplo,fphi,cut;
double *rfile,*efile,*ffile;
double *e2file,*f2file;
double innersq,delta,invdelta,deltasq6;
double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
};
int ntables;
Table *tables;
TableDeviceConst d_table_const;
TableDevice* d_table;
TableHost* h_table;
int **tabindex;
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
void allocate();
void read_table(Table *, char *, char *);
void param_extract(Table *, char *);
void bcast_table(Table *);
void spline_table(Table *);
void compute_table(Table *);
void null_table(Table *);
void free_table(Table *);
void spline(double *, double *, int, double, double, double *);
double splint(double *, double *, double *, int, double);
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array_const c_x;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
protected:
int nlocal,nall,eflag,vflag,neighflag,newton_pair;
class AtomKokkos *atomKK;
int update_table;
void create_kokkos_tables();
void cleanup_copy();
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
template<bool STACKPARAMS, class Specialisation>
KOKKOS_INLINE_FUNCTION
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
return 0;
}
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
/*template<int FULL_NEIGH>
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const;
*/
};
/*
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
struct PairTableComputeFunctor {
typedef DeviceType device_type ;
typedef EV_FLOAT value_type;
PairTableKokkos<DeviceType> c;
NeighListKokkos<DeviceType> list;
PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
NeighListKokkos<DeviceType>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
else
energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
}
KOKKOS_INLINE_FUNCTION
static void init(volatile value_type &update) {
update.evdwl = 0;
update.ecoul = 0;
update.v[0] = 0;
update.v[1] = 0;
update.v[2] = 0;
update.v[3] = 0;
update.v[4] = 0;
update.v[5] = 0;
}
KOKKOS_INLINE_FUNCTION
static void join(volatile value_type &update,
const volatile value_type &source) {
update.evdwl += source.evdwl;
update.ecoul += source.ecoul;
update.v[0] += source.v[0];
update.v[1] += source.v[1];
update.v[2] += source.v[2];
update.v[3] += source.v[3];
update.v[4] += source.v[4];
update.v[5] += source.v[5];
}
};
*/
}
#endif
#endif
/* ERROR/WARNING messages:
E: Pair distance < table inner cutoff
Two atoms are closer together than the pairwise table allows.
E: Pair distance > table outer cutoff
Two atoms are further apart than the pairwise table allows.
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Unknown table style in pair_style command
Style of table is invalid for use with pair_style table command.
E: Illegal number of pair table entries
There must be at least 2 table entries.
E: Invalid pair table length
Length of read-in pair table is invalid
E: Invalid pair table cutoff
Cutoffs in pair_coeff command are not valid with read-in pair table.
E: Bitmapped table in file does not match requested table
Setting for bitmapped table in pair_coeff command must match table
in file exactly.
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Cannot open file %s
The specified file cannot be opened. Check that the path and name are
correct. If the file is a compressed file, also check that the gzip
executable can be found and run.
E: Did not find keyword in table file
Keyword used in pair_coeff command was not found in table file.
E: Bitmapped table is incorrect length in table file
Number of table entries is not a correct power of 2.
E: Invalid keyword in pair table parameters
Keyword used in list of table parameters is not recognized.
E: Pair table parameters did not set N
List of pair table parameters must include N setting.
E: Pair table cutoffs must all be equal to use with KSpace
When using pair style table with a long-range KSpace solver, the
cutoffs for all atom type pairs must all be the same, since the
long-range solver starts at that cutoff.
*/

View File

@ -0,0 +1,443 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "string.h"
#include "verlet_kokkos.h"
#include "neighbor.h"
#include "domain.h"
#include "comm.h"
#include "atom.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "kspace.h"
#include "output.h"
#include "update.h"
#include "modify.h"
#include "compute.h"
#include "fix.h"
#include "timer.h"
#include "memory.h"
#include "error.h"
#include <ctime>
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
Verlet(lmp, narg, arg)
{
atomKK = (AtomKokkos *) atom;
}
/* ----------------------------------------------------------------------
setup before run
------------------------------------------------------------------------- */
void VerletKokkos::setup()
{
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
atomKK->modified(Host,ALL_MASK);
atomKK->setup();
modify->setup_pre_exchange();
// debug
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
if (atomKK->sortfreq > 0) atomKK->sort();
comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build();
neighbor->ncalls = 0;
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
}
if (force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
modify->setup(vflag);
output->setup();
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
setup without output
flag = 0 = just force calculation
flag = 1 = reneighbor and force calculation
------------------------------------------------------------------------- */
void VerletKokkos::setup_minimal(int flag)
{
update->setupflag = 1;
// setup domain, communication and neighboring
// acquire ghosts
// build neighbor lists
if (flag) {
atomKK->modified(Host,ALL_MASK);
modify->setup_pre_exchange();
// debug
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
atomKK->sync(Host,ALL_MASK);
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
comm->exchange();
comm->borders();
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
atomKK->sync(Host,ALL_MASK);
domain->image_check();
domain->box_too_small_check();
modify->setup_pre_neighbor();
atomKK->modified(Host,ALL_MASK);
neighbor->build();
neighbor->ncalls = 0;
}
// compute all forces
ev_set(update->ntimestep);
force_clear();
modify->setup_pre_force(vflag);
if (pair_compute_flag) force->pair->compute(eflag,vflag);
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
}
if (force->kspace) {
force->kspace->setup();
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
else force->kspace->compute_dummy(eflag,vflag);
}
if (force->newton) comm->reverse_comm();
modify->setup(vflag);
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
run for N steps
------------------------------------------------------------------------- */
void VerletKokkos::run(int n)
{
bigint ntimestep;
int nflag,sortflag;
int n_post_integrate = modify->n_post_integrate;
int n_pre_exchange = modify->n_pre_exchange;
int n_pre_neighbor = modify->n_pre_neighbor;
int n_pre_force = modify->n_pre_force;
int n_post_force = modify->n_post_force;
int n_end_of_step = modify->n_end_of_step;
if (atomKK->sortfreq > 0) sortflag = 1;
else sortflag = 0;
static double time = 0.0;
static int count = 0;
atomKK->sync(Device,ALL_MASK);
Kokkos::Impl::Timer ktimer;
for (int i = 0; i < n; i++) {
ntimestep = ++update->ntimestep;
ev_set(ntimestep);
// initial time integration
ktimer.reset();
modify->initial_integrate(vflag);
time += ktimer.seconds();
if (n_post_integrate) modify->post_integrate();
// regular communication vs neighbor list rebuild
nflag = neighbor->decide();
if (nflag == 0) {
timer->stamp();
comm->forward_comm();
timer->stamp(TIME_COMM);
} else {
// added debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (n_pre_exchange) modify->pre_exchange();
// debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->x2lamda(atomKK->nlocal);
domain->pbc();
if (domain->box_change) {
domain->reset_box();
comm->setup();
if (neighbor->style) neighbor->setup_bins();
}
timer->stamp();
// added debug
//atomKK->sync(Device,ALL_MASK);
//atomKK->modified(Device,ALL_MASK);
comm->exchange();
if (sortflag && ntimestep >= atomKK->nextsort) atomKK->sort();
comm->borders();
// added debug
//atomKK->sync(Host,ALL_MASK);
//atomKK->modified(Host,ALL_MASK);
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
timer->stamp(TIME_COMM);
if (n_pre_neighbor) modify->pre_neighbor();
neighbor->build();
timer->stamp(TIME_NEIGHBOR);
}
// force computations
// important for pair to come before bonded contributions
// since some bonded potentials tally pairwise energy/virial
// and Pair:ev_tally() needs to be called before any tallying
force_clear();
// added for debug
//atomKK->k_x.sync<LMPHostType>();
//atomKK->k_f.sync<LMPHostType>();
//atomKK->k_f.modify<LMPHostType>();
if (n_pre_force) modify->pre_force(vflag);
timer->stamp();
if (pair_compute_flag) {
force->pair->compute(eflag,vflag);
timer->stamp(TIME_PAIR);
}
if (atomKK->molecular) {
if (force->bond) force->bond->compute(eflag,vflag);
if (force->angle) force->angle->compute(eflag,vflag);
if (force->dihedral) force->dihedral->compute(eflag,vflag);
if (force->improper) force->improper->compute(eflag,vflag);
timer->stamp(TIME_BOND);
}
if (kspace_compute_flag) {
force->kspace->compute(eflag,vflag);
timer->stamp(TIME_KSPACE);
}
// reverse communication of forces
if (force->newton) {
atomKK->sync(Host,F_MASK);
comm->reverse_comm();
atomKK->modified(Host,F_MASK);
timer->stamp(TIME_COMM);
}
// force modifications, final time integration, diagnostics
ktimer.reset();
if (n_post_force) modify->post_force(vflag);
modify->final_integrate();
if (n_end_of_step) modify->end_of_step();
time += ktimer.seconds();
// all output
if (ntimestep == output->next) {
atomKK->sync(Host,ALL_MASK);
timer->stamp();
output->write(ntimestep);
timer->stamp(TIME_OUTPUT);
}
}
}
/* ----------------------------------------------------------------------
clear force on own & ghost atoms
clear other arrays as needed
------------------------------------------------------------------------- */
void VerletKokkos::force_clear()
{
int i;
if (external_force_clear) return;
// clear force on all particles
// if either newton flag is set, also include ghosts
// when using threads always clear all forces.
if (neighbor->includegroup == 0) {
int nall;
if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
else nall = atomKK->nlocal;
size_t nbytes = sizeof(double) * nall;
if (nbytes) {
if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes);
if (erforceflag) memset(&(atomKK->erforce[0]), 0, nbytes);
if (e_flag) memset(&(atomKK->de[0]), 0, nbytes);
if (rho_flag) memset(&(atomKK->drho[0]), 0, nbytes);
}
// neighbor includegroup flag is set
// clear force only on initial nfirst particles
// if either newton flag is set, also include ghosts
} else {
int nall = atomKK->nfirst;
if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
if (torqueflag) {
double **torque = atomKK->torque;
for (i = 0; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atomKK->erforce;
for (i = 0; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atomKK->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atomKK->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
}
if (force->newton) {
nall = atomKK->nlocal + atomKK->nghost;
if (torqueflag) {
double **torque = atomKK->torque;
for (i = atomKK->nlocal; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
}
if (erforceflag) {
double *erforce = atomKK->erforce;
for (i = atomKK->nlocal; i < nall; i++) erforce[i] = 0.0;
}
if (e_flag) {
double *de = atomKK->de;
for (i = 0; i < nall; i++) de[i] = 0.0;
}
if (rho_flag) {
double *drho = atomKK->drho;
for (i = 0; i < nall; i++) drho[i] = 0.0;
}
}
}
}

View File

@ -0,0 +1,48 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef INTEGRATE_CLASS
IntegrateStyle(verlet/kk,VerletKokkos)
#else
#ifndef LMP_VERLET_KOKKOS_H
#define LMP_VERLET_KOKKOS_H
#include "verlet.h"
namespace LAMMPS_NS {
class VerletKokkos : public Verlet {
public:
VerletKokkos(class LAMMPS *, int, char **);
~VerletKokkos() {}
void setup();
void setup_minimal(int);
void run(int);
protected:
class AtomKokkos *atomKK;
void force_clear();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

111
src/MAKE/Makefile.cuda Executable file
View File

@ -0,0 +1,111 @@
# cuda = RedHat Linux box, nvcc for Kokkos, MPICH2, FFTW
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = nvcc
CCFLAGS = -g -O3 -arch=sm_20
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = g++
LINKFLAGS = -g -O
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings, OPTIONAL
# see possible settings in doc/Section_start.html#2_2 (step 4)
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library, REQUIRED
# see discussion in doc/Section_start.html#2_2 (step 5)
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX
MPI_PATH =
MPI_LIB = -lmpich -lmpl -lpthread
# FFT library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 6)
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_FFTW
FFT_PATH =
FFT_LIB = -lfftw
# JPEG and/or PNG library, OPTIONAL
# see discussion in doc/Section_start.html#2_2 (step 7)
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# no need to edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cu
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
# Individual dependencies
DEPENDS = $(OBJ:.o=.d)
sinclude $(DEPENDS)

View File

@ -14,7 +14,7 @@ OBJ = $(SRC:.cpp=.o)
# Package variables
PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
kspace manybody mc meam misc molecule mpiio opt peri poems \
kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
reax replica rigid shock srd voronoi xtc
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \

View File

@ -80,8 +80,10 @@ void AtomVec::init()
deform_groupbit = domain->deform_groupbit;
h_rate = domain->h_rate;
if (lmp->cuda != NULL && cudable == false)
if (lmp->cuda != NULL && !cudable)
error->all(FLERR,"USER-CUDA package requires a cuda enabled atom_style");
if (lmp->kokkos != NULL && !kokkosable)
error->all(FLERR,"KOKKOS package requires a kokkos enabled atom_style");
}
/* ----------------------------------------------------------------------

View File

@ -334,7 +334,7 @@ void ComputePropertyLocal::compute_local()
int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
{
int i,j,m,n,ii,jj,inum,jnum,itype,jtype;
int i,j,m,ii,jj,inum,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *ilist,*jlist,*numneigh,**firstneigh;
@ -358,7 +358,7 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
double **cutsq = force->pair->cutsq;
m = n = 0;
m = 0;
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
if (!(mask[i] & groupbit)) continue;