forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12040 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
8ac75c42a4
commit
3545d44d0d
|
@ -0,0 +1,71 @@
|
|||
# Install/unInstall package files in LAMMPS
|
||||
# mode = 0/1/2 for uninstall/install/update
|
||||
|
||||
mode=$1
|
||||
|
||||
# arg1 = file, arg2 = file it depends on
|
||||
|
||||
action () {
|
||||
if (test $mode = 0) then
|
||||
rm -f ../$1
|
||||
elif (! cmp -s $1 ../$1) then
|
||||
if (test -z "$2" || test -e ../$2) then
|
||||
cp $1 ..
|
||||
if (test $mode = 2) then
|
||||
echo " updating src/$1"
|
||||
fi
|
||||
fi
|
||||
elif (test -n "$2") then
|
||||
if (test ! -e ../$2) then
|
||||
rm -f ../$1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# force rebuild of files with LMP_KOKKOS switch
|
||||
|
||||
touch ../accelerator_kokkos.h
|
||||
touch ../memory.h
|
||||
|
||||
# all package files with no dependencies
|
||||
|
||||
for file in *.cpp *.h; do
|
||||
action $file
|
||||
done
|
||||
|
||||
# edit 2 Makefile.package files to include/exclude package info
|
||||
|
||||
if (test $1 = 1) then
|
||||
|
||||
if (test -e ../Makefile.package) then
|
||||
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-I..\/..\/lib\/kokkos\/core\/src -I../../lib/kokkos/containers/src -DLMP_KOKKOS |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(kokkos_SYSINC) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(kokkos_SYSLIB) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
|
||||
fi
|
||||
|
||||
if (test -e ../Makefile.package.settings) then
|
||||
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
|
||||
# multiline form needed for BSD sed on Macs
|
||||
sed -i -e '4 i \
|
||||
include ..\/..\/lib\/kokkos\/Makefile.lammps
|
||||
' ../Makefile.package.settings
|
||||
|
||||
fi
|
||||
|
||||
elif (test $1 = 0) then
|
||||
|
||||
if (test -e ../Makefile.package) then
|
||||
sed -i -e 's/[^ \t]*kokkos[^ \t]* //g' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
|
||||
fi
|
||||
|
||||
if (test -e ../Makefile.package.settings) then
|
||||
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
|
||||
fi
|
||||
|
||||
fi
|
|
@ -0,0 +1,190 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_vec.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "comm_kokkos.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "atom_masks.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
|
||||
{
|
||||
// set CommKokkos pointer to Atom class, since CommKokkos allocated first
|
||||
|
||||
((CommKokkos *) comm)->atomKK = this;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomKokkos::~AtomKokkos()
|
||||
{
|
||||
k_tag = DAT::tdual_int_1d();
|
||||
k_mask = DAT::tdual_int_1d();
|
||||
k_type = DAT::tdual_int_1d();
|
||||
k_image = DAT::tdual_int_1d();
|
||||
k_molecule = DAT::tdual_int_1d();
|
||||
|
||||
k_x = DAT::tdual_x_array();
|
||||
k_v = DAT::tdual_v_array();
|
||||
k_f = DAT::tdual_f_array();
|
||||
|
||||
k_mass = DAT::tdual_float_1d();
|
||||
|
||||
tag = NULL;
|
||||
mask = NULL;
|
||||
type = NULL;
|
||||
image = NULL;
|
||||
molecule = NULL;
|
||||
mass = NULL;
|
||||
|
||||
memory->sfree(x);
|
||||
memory->sfree(v);
|
||||
memory->sfree(f);
|
||||
x = NULL;
|
||||
v = NULL;
|
||||
f = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
|
||||
{
|
||||
((AtomVecKokkos *) avec)->sync(space,mask);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
|
||||
{
|
||||
((AtomVecKokkos *) avec)->modified(space,mask);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::allocate_type_arrays()
|
||||
{
|
||||
if (avec->mass_type) {
|
||||
k_mass = DAT::tdual_float_1d("Mass",ntypes+1);
|
||||
mass = k_mass.h_view.ptr_on_device();
|
||||
mass_setflag = new int[ntypes+1];
|
||||
for (int itype = 1; itype <= ntypes; itype++) mass_setflag[itype] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::sort()
|
||||
{
|
||||
int i,m,n,ix,iy,iz,ibin,empty;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
modified(Host,ALL_MASK);
|
||||
|
||||
// set next timestep for sorting to take place
|
||||
|
||||
nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;
|
||||
|
||||
// re-setup sort bins if needed
|
||||
|
||||
if (domain->box_change) setup_sort_bins();
|
||||
if (nbins == 1) return;
|
||||
|
||||
// reallocate per-atom vectors if needed
|
||||
|
||||
if (nlocal > maxnext) {
|
||||
memory->destroy(next);
|
||||
memory->destroy(permute);
|
||||
maxnext = atom->nmax;
|
||||
memory->create(next,maxnext,"atom:next");
|
||||
memory->create(permute,maxnext,"atom:permute");
|
||||
}
|
||||
|
||||
// insure there is one extra atom location at end of arrays for swaps
|
||||
|
||||
if (nlocal == nmax) avec->grow(0);
|
||||
|
||||
// bin atoms in reverse order so linked list will be in forward order
|
||||
|
||||
for (i = 0; i < nbins; i++) binhead[i] = -1;
|
||||
|
||||
HAT::t_x_array_const h_x = k_x.view<LMPHostType>();
|
||||
for (i = nlocal-1; i >= 0; i--) {
|
||||
ix = static_cast<int> ((h_x(i,0)-bboxlo[0])*bininvx);
|
||||
iy = static_cast<int> ((h_x(i,1)-bboxlo[1])*bininvy);
|
||||
iz = static_cast<int> ((h_x(i,2)-bboxlo[2])*bininvz);
|
||||
ix = MAX(ix,0);
|
||||
iy = MAX(iy,0);
|
||||
iz = MAX(iz,0);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
ibin = iz*nbiny*nbinx + iy*nbinx + ix;
|
||||
next[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
|
||||
// permute = desired permutation of atoms
|
||||
// permute[I] = J means Ith new atom will be Jth old atom
|
||||
|
||||
n = 0;
|
||||
for (m = 0; m < nbins; m++) {
|
||||
i = binhead[m];
|
||||
while (i >= 0) {
|
||||
permute[n++] = i;
|
||||
i = next[i];
|
||||
}
|
||||
}
|
||||
|
||||
// current = current permutation, just reuse next vector
|
||||
// current[I] = J means Ith current atom is Jth old atom
|
||||
|
||||
int *current = next;
|
||||
for (i = 0; i < nlocal; i++) current[i] = i;
|
||||
|
||||
// reorder local atom list, when done, current = permute
|
||||
// perform "in place" using copy() to extra atom location at end of list
|
||||
// inner while loop processes one cycle of the permutation
|
||||
// copy before inner-loop moves an atom to end of atom list
|
||||
// copy after inner-loop moves atom at end of list back into list
|
||||
// empty = location in atom list that is currently empty
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (current[i] == permute[i]) continue;
|
||||
avec->copy(i,nlocal,0);
|
||||
empty = i;
|
||||
while (permute[empty] != i) {
|
||||
avec->copy(permute[empty],empty,0);
|
||||
empty = current[empty] = permute[empty];
|
||||
}
|
||||
avec->copy(nlocal,empty,0);
|
||||
current[empty] = permute[empty];
|
||||
}
|
||||
|
||||
// sanity check that current = permute
|
||||
|
||||
//int flag = 0;
|
||||
//for (i = 0; i < nlocal; i++)
|
||||
// if (current[i] != permute[i]) flag = 1;
|
||||
//int flagall;
|
||||
//MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
//if (flagall) error->all(FLERR,"Atom sort did not operate correctly");
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "atom.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
#ifndef LMP_ATOM_KOKKOS_H
|
||||
#define LMP_ATOM_KOKKOS_H
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomKokkos : public Atom {
|
||||
public:
|
||||
DAT::tdual_int_1d k_tag, k_type, k_mask, k_molecule;
|
||||
DAT::tdual_tagint_1d k_image;
|
||||
DAT::tdual_x_array k_x;
|
||||
DAT::tdual_v_array k_v;
|
||||
DAT::tdual_f_array k_f;
|
||||
|
||||
DAT::tdual_float_1d k_mass;
|
||||
|
||||
AtomKokkos(class LAMMPS *);
|
||||
~AtomKokkos();
|
||||
|
||||
virtual void allocate_type_arrays();
|
||||
void sync(const ExecutionSpace space, unsigned int mask);
|
||||
void modified(const ExecutionSpace space, unsigned int mask);
|
||||
virtual void sort();
|
||||
};
|
||||
|
||||
template<class ViewType, class IndexView>
|
||||
class SortFunctor {
|
||||
typedef typename ViewType::device_type device_type;
|
||||
ViewType source;
|
||||
Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
|
||||
IndexView index;
|
||||
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0());
|
||||
}
|
||||
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1());
|
||||
}
|
||||
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2());
|
||||
}
|
||||
SortFunctor(ViewType src, typename Kokkos::Impl::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.dimension_0(),src.dimension_1(),src.dimension_2(),src.dimension_3());
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==1, int>::type& i) {
|
||||
dest(i) = source(index(i));
|
||||
}
|
||||
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==2, int>::type& i) {
|
||||
for(int j=0;j<source.dimension_1();j++)
|
||||
dest(i,j) = source(index(i),j);
|
||||
}
|
||||
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==3, int>::type& i) {
|
||||
for(int j=0;j<source.dimension_1();j++)
|
||||
for(int k=0;k<source.dimension_2();k++)
|
||||
dest(i,j,k) = source(index(i),j,k);
|
||||
}
|
||||
void operator()(const typename Kokkos::Impl::enable_if<ViewType::rank==4, int>::type& i) {
|
||||
for(int j=0;j<source.dimension_1();j++)
|
||||
for(int k=0;k<source.dimension_2();k++)
|
||||
for(int l=0;l<source.dimension_3();l++)
|
||||
dest(i,j,k,l) = source(index(i),j,k,l);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,111 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
|
||||
AtomStyle(atomic/kk,AtomVecAtomicKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_ATOMIC_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_ATOMIC_KOKKOS_H
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecAtomicKokkos : public AtomVecKokkos {
|
||||
public:
|
||||
AtomVecAtomicKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecAtomicKokkos() {}
|
||||
void grow(int);
|
||||
void copy(int, int, int);
|
||||
int pack_comm(int, int *, double *, int, int *);
|
||||
int pack_comm_vel(int, int *, double *, int, int *);
|
||||
void unpack_comm(int, int, double *);
|
||||
void unpack_comm_vel(int, int, double *);
|
||||
int pack_reverse(int, int, double *);
|
||||
void unpack_reverse(int, int *, double *);
|
||||
int pack_border(int, int *, double *, int, int *);
|
||||
int pack_border_vel(int, int *, double *, int, int *);
|
||||
void unpack_border(int, int, double *);
|
||||
void unpack_border_vel(int, int, double *);
|
||||
int pack_exchange(int, double *);
|
||||
int unpack_exchange(double *);
|
||||
int size_restart();
|
||||
int pack_restart(int, double *);
|
||||
int unpack_restart(double *);
|
||||
void create_atom(int, double *);
|
||||
void data_atom(double *, tagint, char **);
|
||||
void pack_data(double **);
|
||||
void write_data(FILE *, int, double **);
|
||||
bigint memory_usage();
|
||||
|
||||
void grow_reset();
|
||||
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
|
||||
const int & iswap,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
void unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf);
|
||||
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space);
|
||||
void unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space);
|
||||
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi);
|
||||
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space);
|
||||
|
||||
void sync(ExecutionSpace space, unsigned int mask);
|
||||
void modified(ExecutionSpace space, unsigned int mask);
|
||||
|
||||
protected:
|
||||
int *tag,*type,*mask;
|
||||
tagint *image;
|
||||
double **x,**v,**f;
|
||||
|
||||
DAT::t_int_1d d_tag, d_type, d_mask;
|
||||
HAT::t_int_1d h_tag, h_type, h_mask;
|
||||
|
||||
DAT::t_tagint_1d d_image;
|
||||
HAT::t_tagint_1d h_image;
|
||||
|
||||
DAT::t_x_array d_x;
|
||||
DAT::t_v_array d_v;
|
||||
DAT::t_f_array d_f;
|
||||
HAT::t_x_array h_x;
|
||||
HAT::t_v_array h_v;
|
||||
HAT::t_f_array h_f;
|
||||
|
||||
DAT::tdual_int_1d k_count;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,23 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "atom_vec_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
|
||||
{
|
||||
kokkosable = 1;
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_ATOM_VEC_KOKKOS_H
|
||||
#define LMP_ATOM_VEC_KOKKOS_H
|
||||
|
||||
#include "atom_vec.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecKokkos : public AtomVec {
|
||||
public:
|
||||
AtomVecKokkos(class LAMMPS *);
|
||||
virtual ~AtomVecKokkos() {}
|
||||
|
||||
virtual void sync(ExecutionSpace space, unsigned int mask) {};
|
||||
virtual void modified(ExecutionSpace space, unsigned int mask) {};
|
||||
|
||||
virtual int
|
||||
pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[])
|
||||
{return 0;}
|
||||
virtual int
|
||||
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
||||
const int &pbc_flag, const int pbc[])
|
||||
{return 0;}
|
||||
virtual void
|
||||
unpack_comm_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf) {};
|
||||
virtual int
|
||||
pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
DAT::tdual_xfloat_2d buf,int iswap,
|
||||
int pbc_flag, int *pbc, ExecutionSpace space)
|
||||
{return 0;};
|
||||
virtual void
|
||||
unpack_border_kokkos(const int &n, const int &nfirst,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {};
|
||||
|
||||
virtual int
|
||||
pack_exchange_kokkos(const int &nsend, DAT::tdual_xfloat_2d &buf,
|
||||
DAT::tdual_int_1d k_sendlist,
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space, int dim, X_FLOAT lo, X_FLOAT hi)
|
||||
{return 0;};
|
||||
virtual int
|
||||
unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
|
||||
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
|
||||
ExecutionSpace space)
|
||||
{return 0;};
|
||||
|
||||
protected:
|
||||
class AtomKokkos *atomKK;
|
||||
class CommKokkos *commKK;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,820 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "comm_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_vec.h"
|
||||
#include "atom_vec_kokkos.h"
|
||||
#include "domain.h"
|
||||
#include "atom_masks.h"
|
||||
#include "error.h"
|
||||
#include "memory.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define BUFFACTOR 1.5
|
||||
#define BUFMIN 10000
|
||||
#define BUFEXTRA 1000
|
||||
|
||||
enum{SINGLE,MULTI};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup MPI and allocate buffer space
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
{
|
||||
sendlist = NULL; // need to free this since parent allocated?
|
||||
k_sendlist = ArrayTypes<LMPDeviceType>::tdual_int_2d();
|
||||
|
||||
// error check for disallow of OpenMP threads?
|
||||
|
||||
// initialize comm buffers & exchange memory
|
||||
|
||||
maxsend = BUFMIN;
|
||||
k_buf_send = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_send",(maxsend+BUFEXTRA+5)/6,6);
|
||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||
|
||||
maxrecv = BUFMIN;
|
||||
k_buf_recv = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_recv",(maxrecv+5)/6,6);
|
||||
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
||||
|
||||
k_exchange_sendlist = ArrayTypes<LMPDeviceType>::
|
||||
tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||
k_exchange_copylist = ArrayTypes<LMPDeviceType>::
|
||||
tdual_int_1d("comm:k_exchange_copylist",100);
|
||||
k_count = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_count",1);
|
||||
k_sendflag = ArrayTypes<LMPDeviceType>::tdual_int_1d("comm:k_sendflag",100);
|
||||
|
||||
// next line is bogus?
|
||||
|
||||
memory->create(maxsendlist,maxswap,"comm:maxsendlist");
|
||||
for (int i = 0; i < maxswap; i++) {
|
||||
maxsendlist[i] = BUFMIN;
|
||||
}
|
||||
memory->create_kokkos(k_sendlist,sendlist,maxswap,BUFMIN,"comm:sendlist");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommKokkos::~CommKokkos()
|
||||
{
|
||||
memory->destroy_kokkos(k_sendlist,sendlist);
|
||||
memory->destroy_kokkos(k_buf_send,buf_send);
|
||||
memory->destroy_kokkos(k_buf_recv,buf_recv);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::init()
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
||||
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
||||
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
||||
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
|
||||
|
||||
CommBrick::init();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
forward communication of atom coords every timestep
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::forward_comm(int dummy)
|
||||
{
|
||||
|
||||
if (!forward_comm_classic) {
|
||||
if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
|
||||
else forward_comm_device<LMPDeviceType>(dummy);
|
||||
return;
|
||||
}
|
||||
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
|
||||
if (comm_x_only) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
} else if (ghost_velocity) {
|
||||
atomKK->sync(Host,X_MASK | V_MASK);
|
||||
atomKK->modified(Host,X_MASK | V_MASK);
|
||||
} else {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
CommBrick::forward_comm(dummy);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::forward_comm_device(int dummy)
|
||||
{
|
||||
int n;
|
||||
MPI_Request request;
|
||||
MPI_Status status;
|
||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||
double **x = atom->x;
|
||||
double *buf;
|
||||
|
||||
// exchange data with another proc
|
||||
// if other proc is self, just copy
|
||||
// if comm_x_only set, exchange or copy directly to x, don't unpack
|
||||
|
||||
k_sendlist.sync<DeviceType>();
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
||||
else buf = NULL;
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
} else if (ghost_velocity) {
|
||||
error->all(FLERR,"Ghost velocity forward comm not yet "
|
||||
"implemented with Kokkos");
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<LMPHostType>().ptr_on_device(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
}
|
||||
|
||||
} else {
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
} else if (ghost_velocity) {
|
||||
error->all(FLERR,"Ghost velocity forward comm not yet "
|
||||
"implemented with Kokkos");
|
||||
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
exchange: move atoms to correct processors
|
||||
atoms exchanged with all 6 stencil neighbors
|
||||
send out atoms that have left my box, receive ones entering my box
|
||||
atoms will be lost if not inside some proc's box
|
||||
can happen if atom moves outside of non-periodic bounary
|
||||
or if atom moves more than one proc away
|
||||
this routine called before every reneighboring
|
||||
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::exchange()
|
||||
{
|
||||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) exchange_device<LMPHostType>();
|
||||
else exchange_device<LMPDeviceType>();
|
||||
return;
|
||||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
CommBrick::exchange();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
struct BuildExchangeListFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
X_FLOAT _lo,_hi;
|
||||
typename AT::t_x_array _x;
|
||||
|
||||
int _nlocal,_dim;
|
||||
typename AT::t_int_1d _nsend;
|
||||
typename AT::t_int_1d _sendlist;
|
||||
typename AT::t_int_1d _sendflag;
|
||||
|
||||
|
||||
BuildExchangeListFunctor(
|
||||
const typename AT::tdual_x_array x,
|
||||
const typename AT::tdual_int_1d sendlist,
|
||||
typename AT::tdual_int_1d nsend,
|
||||
typename AT::tdual_int_1d sendflag,int nlocal, int dim,
|
||||
X_FLOAT lo, X_FLOAT hi):
|
||||
_x(x.template view<DeviceType>()),
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_nsend(nsend.template view<DeviceType>()),
|
||||
_sendflag(sendflag.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
|
||||
const int mysend=Kokkos::atomic_fetch_add(&_nsend(0),1);
|
||||
if(mysend<_sendlist.dimension_0()) {
|
||||
_sendlist(mysend) = i;
|
||||
_sendflag(i) = 1;
|
||||
}
|
||||
} else
|
||||
_sendflag(i) = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::exchange_device()
|
||||
{
|
||||
int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
|
||||
double lo,hi,value;
|
||||
double **x;
|
||||
double *sublo,*subhi,*buf;
|
||||
MPI_Request request;
|
||||
MPI_Status status;
|
||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||
|
||||
// clear global->local map for owned and ghost atoms
|
||||
// b/c atoms migrate to new procs in exchange() and
|
||||
// new ghosts are created in borders()
|
||||
// map_set() is done at end of borders()
|
||||
// clear ghost count and any ghost bonus data internal to AtomVec
|
||||
|
||||
if (map_style) atom->map_clear();
|
||||
atom->nghost = 0;
|
||||
atom->avec->clear_bonus();
|
||||
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
|
||||
if (triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
|
||||
if (true) {
|
||||
if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
|
||||
k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
|
||||
while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
|
||||
k_count.h_view(0) = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
DeviceType::fence();
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
|
||||
k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
|
||||
k_exchange_copylist.resize(k_count.h_view(0)*1.1);
|
||||
k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
|
||||
}
|
||||
}
|
||||
k_exchange_sendlist.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view(0);
|
||||
for(int i = 0; i < k_count.h_view(0); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend =
|
||||
avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += avec->pack_exchange(i,&buf_send[nsend]);
|
||||
avec->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
}
|
||||
atom->nlocal = nlocal;
|
||||
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
buf = buf_send;
|
||||
if (nrecv) {
|
||||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
|
||||
nrecv = nrecv1;
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
|
||||
nrecv += nrecv2;
|
||||
}
|
||||
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
|
||||
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),nrecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device()+nrecv1,
|
||||
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
buf = buf_recv;
|
||||
if (nrecv) {
|
||||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
|
||||
}
|
||||
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
if (atom->firstgroupname) {
|
||||
/* this is not yet implemented with Kokkos */
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atom->first_reorder();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
borders: list nearby atoms to send to neighboring procs at every timestep
|
||||
one list is created for every swap that will be made
|
||||
as list is made, actually do swaps
|
||||
this does equivalent of a communicate, so don't need to explicitly
|
||||
call communicate routine on reneighboring timestep
|
||||
this routine is called before every reneighboring
|
||||
for triclinic, atoms must be in lamda coords (0-1) before borders is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::borders()
|
||||
{
|
||||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) borders_device<LMPHostType>();
|
||||
else borders_device<LMPDeviceType>();
|
||||
return;
|
||||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
CommBrick::borders();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
struct BuildBorderListFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
X_FLOAT lo,hi;
|
||||
typename AT::t_x_array x;
|
||||
int iswap,maxsendlist;
|
||||
int nfirst,nlast,dim;
|
||||
typename AT::t_int_2d sendlist;
|
||||
typename AT::t_int_1d nsend;
|
||||
|
||||
BuildBorderListFunctor(typename AT::tdual_x_array _x,
|
||||
typename AT::tdual_int_2d _sendlist,
|
||||
typename AT::tdual_int_1d _nsend,int _nfirst,
|
||||
int _nlast, int _dim,
|
||||
X_FLOAT _lo, X_FLOAT _hi, int _iswap,
|
||||
int _maxsendlist):
|
||||
x(_x.template view<DeviceType>()),
|
||||
sendlist(_sendlist.template view<DeviceType>()),
|
||||
nsend(_nsend.template view<DeviceType>()),
|
||||
nfirst(_nfirst),nlast(_nlast),dim(_dim),
|
||||
lo(_lo),hi(_hi),iswap(_iswap),maxsendlist(_maxsendlist){}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (DeviceType dev) const {
|
||||
const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) /
|
||||
dev.league_size());
|
||||
const int teamstart = chunk*dev.league_rank() + nfirst;
|
||||
const int teamend = (teamstart + chunk) < nlast?(teamstart + chunk):nlast;
|
||||
int mysend = 0;
|
||||
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
|
||||
if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++;
|
||||
}
|
||||
const int my_store_pos = dev.team_scan(mysend,&nsend(0));
|
||||
|
||||
if (my_store_pos+mysend < maxsendlist) {
|
||||
mysend = my_store_pos;
|
||||
for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
|
||||
if (x(i,dim) >= lo && x(i,dim) <= hi) {
|
||||
sendlist(iswap,mysend++) = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t shmem_size() const { return 1000u;}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::borders_device() {
|
||||
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
|
||||
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
|
||||
double lo,hi;
|
||||
int *type;
|
||||
double **x;
|
||||
double *buf,*mlo,*mhi;
|
||||
MPI_Request request;
|
||||
MPI_Status status;
|
||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||
|
||||
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
k_sendlist.modify<DeviceType>();
|
||||
atomKK->sync(exec_space,ALL_MASK);
|
||||
|
||||
// do swaps over all 3 dimensions
|
||||
|
||||
iswap = 0;
|
||||
smax = rmax = 0;
|
||||
|
||||
for (dim = 0; dim < 3; dim++) {
|
||||
nlast = 0;
|
||||
twoneed = 2*maxneed[dim];
|
||||
for (ineed = 0; ineed < twoneed; ineed++) {
|
||||
|
||||
// find atoms within slab boundaries lo/hi using <= and >=
|
||||
// check atoms between nfirst and nlast
|
||||
// for first swaps in a dim, check owned and ghost
|
||||
// for later swaps in a dim, only check newly arrived ghosts
|
||||
// store sent atom indices in list for use in future timesteps
|
||||
|
||||
x = atom->x;
|
||||
if (style == SINGLE) {
|
||||
lo = slablo[iswap];
|
||||
hi = slabhi[iswap];
|
||||
} else {
|
||||
type = atom->type;
|
||||
mlo = multilo[iswap];
|
||||
mhi = multihi[iswap];
|
||||
}
|
||||
if (ineed % 2 == 0) {
|
||||
nfirst = nlast;
|
||||
nlast = atom->nlocal + atom->nghost;
|
||||
}
|
||||
|
||||
nsend = 0;
|
||||
|
||||
// sendflag = 0 if I do not send on this swap
|
||||
// sendneed test indicates receiver no longer requires data
|
||||
// e.g. due to non-PBC or non-uniform sub-domains
|
||||
|
||||
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
|
||||
else sendflag = 1;
|
||||
|
||||
// find send atoms according to SINGLE vs MULTI
|
||||
// all atoms eligible versus atoms in bordergroup
|
||||
// only need to limit loop to bordergroup for first sends (ineed < 2)
|
||||
// on these sends, break loop in two: owned (in group) and ghost
|
||||
|
||||
if (sendflag) {
|
||||
if (!bordergroup || ineed >= 2) {
|
||||
if (style == SINGLE) {
|
||||
typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
|
||||
total_send.h_view(0) = 0;
|
||||
if(exec_space == Device) {
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPDeviceType>();
|
||||
}
|
||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPHostType>();
|
||||
|
||||
if(total_send.h_view(0) >= maxsendlist[iswap]) {
|
||||
grow_list(iswap,total_send.h_view(0));
|
||||
total_send.h_view(0) = 0;
|
||||
if(exec_space == Device) {
|
||||
total_send.template modify<LMPHostType>();
|
||||
total_send.template sync<LMPDeviceType>();
|
||||
}
|
||||
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
|
||||
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
|
||||
Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
|
||||
Kokkos::parallel_for(config,f);
|
||||
DeviceType::fence();
|
||||
total_send.template modify<DeviceType>();
|
||||
total_send.template sync<LMPHostType>();
|
||||
}
|
||||
nsend = total_send.h_view(0);
|
||||
} else {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos\n");
|
||||
for (i = nfirst; i < nlast; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos\n");
|
||||
if (style == SINGLE) {
|
||||
ngroup = atom->nfirst;
|
||||
for (i = 0; i < ngroup; i++)
|
||||
if (x[i][dim] >= lo && x[i][dim] <= hi) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
for (i = atom->nlocal; i < nlast; i++)
|
||||
if (x[i][dim] >= lo && x[i][dim] <= hi) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
} else {
|
||||
ngroup = atom->nfirst;
|
||||
for (i = 0; i < ngroup; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
for (i = atom->nlocal; i < nlast; i++) {
|
||||
itype = type[i];
|
||||
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
|
||||
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
|
||||
sendlist[iswap][nsend++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pack up list of border atoms
|
||||
|
||||
if (nsend*size_border > maxsend)
|
||||
grow_send_kokkos(nsend*size_border,0);
|
||||
if (ghost_velocity) {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos\n");
|
||||
n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
|
||||
pbc_flag[iswap],pbc[iswap]);
|
||||
}
|
||||
else
|
||||
n = avec->
|
||||
pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
|
||||
pbc_flag[iswap],pbc[iswap],exec_space);
|
||||
|
||||
// swap atoms with other proc
|
||||
// no MPI calls except SendRecv if nsend/nrecv = 0
|
||||
// put incoming ghosts at end of my atom arrays
|
||||
// if swapping with self, simply copy, no messages
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
|
||||
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
|
||||
if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border);
|
||||
if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
|
||||
nrecv*size_border,MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (nrecv) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else {
|
||||
nrecv = nsend;
|
||||
buf = buf_send;
|
||||
}
|
||||
|
||||
// unpack buffer
|
||||
|
||||
if (ghost_velocity) {
|
||||
error->all(FLERR,"Required border comm not yet "
|
||||
"implemented with Kokkos\n");
|
||||
avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
|
||||
}
|
||||
else
|
||||
if (sendproc[iswap] != me)
|
||||
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
|
||||
k_buf_recv,exec_space);
|
||||
else
|
||||
avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
|
||||
k_buf_send,exec_space);
|
||||
|
||||
// set all pointers & counters
|
||||
|
||||
smax = MAX(smax,nsend);
|
||||
rmax = MAX(rmax,nrecv);
|
||||
sendnum[iswap] = nsend;
|
||||
recvnum[iswap] = nrecv;
|
||||
size_forward_recv[iswap] = nrecv*size_forward;
|
||||
size_reverse_send[iswap] = nrecv*size_reverse;
|
||||
size_reverse_recv[iswap] = nsend*size_reverse;
|
||||
firstrecv[iswap] = atom->nlocal + atom->nghost;
|
||||
atom->nghost += nrecv;
|
||||
iswap++;
|
||||
}
|
||||
}
|
||||
|
||||
// insure send/recv buffers are long enough for all forward & reverse comm
|
||||
|
||||
int max = MAX(maxforward*smax,maxreverse*rmax);
|
||||
if (max > maxsend) grow_send_kokkos(max,0);
|
||||
max = MAX(maxforward*rmax,maxreverse*smax);
|
||||
if (max > maxrecv) grow_recv_kokkos(max);
|
||||
|
||||
// reset global->local map
|
||||
|
||||
if (map_style) atom->map_set();
|
||||
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
|
||||
atomKK->modified(exec_space,ALL_MASK);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
||||
if flag = 1, realloc
|
||||
if flag = 0, don't need to realloc with copy, just free/malloc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
{
|
||||
maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
|
||||
if (flag) {
|
||||
if(space == Device)
|
||||
k_buf_send.modify<LMPDeviceType>();
|
||||
else
|
||||
k_buf_send.modify<LMPHostType>();
|
||||
|
||||
k_buf_send.resize(maxsend_border,atom->avec->size_border);
|
||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||
}
|
||||
else {
|
||||
k_buf_send = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border);
|
||||
buf_send = k_buf_send.view<LMPHostType>().ptr_on_device();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free/malloc the size of the recv buffer as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space)
|
||||
{
|
||||
maxrecv = static_cast<int> (BUFFACTOR * n);
|
||||
int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2;
|
||||
k_buf_recv = ArrayTypes<LMPDeviceType>::
|
||||
tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border);
|
||||
buf_recv = k_buf_recv.view<LMPHostType>().ptr_on_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the iswap sendlist as needed with BUFFACTOR
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_list(int iswap, int n)
|
||||
{
|
||||
int size = static_cast<int> (BUFFACTOR * n);
|
||||
|
||||
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
||||
|
||||
for(int i=0;i<maxswap;i++) {
|
||||
maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the buffers needed for swaps
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::grow_swap(int n)
|
||||
{
|
||||
free_swap();
|
||||
allocate_swap(n);
|
||||
if (style == MULTI) {
|
||||
free_multi();
|
||||
allocate_multi(n);
|
||||
}
|
||||
|
||||
maxswap = n;
|
||||
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
|
||||
|
||||
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
||||
|
||||
memory->grow(maxsendlist,n,"comm:maxsendlist");
|
||||
for (int i=0;i<maxswap;i++) maxsendlist[i]=size;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_COMM_KOKKOS_H
|
||||
#define LMP_COMM_KOKKOS_H
|
||||
|
||||
#include "comm_brick.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class CommKokkos : public CommBrick {
|
||||
public:
|
||||
class AtomKokkos *atomKK;
|
||||
|
||||
bool exchange_comm_classic;
|
||||
bool forward_comm_classic;
|
||||
bool exchange_comm_on_host;
|
||||
bool forward_comm_on_host;
|
||||
|
||||
CommKokkos(class LAMMPS *);
|
||||
~CommKokkos();
|
||||
void init();
|
||||
|
||||
void forward_comm(int dummy = 0); // forward comm of atom coords
|
||||
void exchange(); // move atoms to new procs
|
||||
void borders(); // setup list of atoms to comm
|
||||
|
||||
template<class DeviceType> void forward_comm_device(int dummy);
|
||||
template<class DeviceType> void exchange_device();
|
||||
template<class DeviceType> void borders_device();
|
||||
|
||||
protected:
|
||||
DAT::tdual_int_2d k_sendlist;
|
||||
DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
|
||||
DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag;
|
||||
DAT::tdual_int_1d k_count;
|
||||
//double *buf_send; // send buffer for all comm
|
||||
//double *buf_recv; // recv buffer for all comm
|
||||
|
||||
void grow_send_kokkos(int, int, ExecutionSpace space = Host);
|
||||
void grow_recv_kokkos(int, ExecutionSpace space = Host);
|
||||
void grow_list(int, int);
|
||||
void grow_swap(int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,207 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "domain_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
DomainKokkos::DomainKokkos(LAMMPS *lmp) : Domain(lmp) {}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DomainKokkos::init()
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
Domain::init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int PERIODIC, int DEFORM_VREMAP>
|
||||
struct DomainPBCFunctor {
|
||||
typedef DeviceType device_type;
|
||||
double lo[3],hi[3],period[3];
|
||||
typename ArrayTypes<DeviceType>::t_x_array x;
|
||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d image;
|
||||
int deform_groupbit;
|
||||
double h_rate[6];
|
||||
int xperiodic,yperiodic,zperiodic;
|
||||
|
||||
DomainPBCFunctor(double* _lo, double* _hi, double* _period,
|
||||
DAT::tdual_x_array _x, DAT::tdual_v_array _v,
|
||||
DAT::tdual_int_1d _mask, DAT::tdual_int_1d _image,
|
||||
int _deform_groupbit, double* _h_rate,
|
||||
int _xperiodic, int _yperiodic, int _zperiodic):
|
||||
x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
|
||||
mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
|
||||
deform_groupbit(_deform_groupbit),
|
||||
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
|
||||
lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
|
||||
hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
|
||||
period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];
|
||||
h_rate[0]=_h_rate[0]; h_rate[1]=_h_rate[1]; h_rate[2]=_h_rate[2];
|
||||
h_rate[3]=_h_rate[3]; h_rate[4]=_h_rate[4]; h_rate[5]=_h_rate[5];
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
if (PERIODIC && xperiodic) {
|
||||
if (x(i,0) < lo[0]) {
|
||||
x(i,0) += period[0];
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) += h_rate[0];
|
||||
int idim = image[i] & IMGMASK;
|
||||
const int otherdims = image[i] ^ idim;
|
||||
idim--;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | idim;
|
||||
}
|
||||
if (x(i,0) >= hi[0]) {
|
||||
x(i,0) -= period[0];
|
||||
x(i,0) = MAX(x(i,0),lo[0]);
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) v(i,0) -= h_rate[0];
|
||||
int idim = image[i] & IMGMASK;
|
||||
const int otherdims = image[i] ^ idim;
|
||||
idim++;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | idim;
|
||||
}
|
||||
}
|
||||
|
||||
if (PERIODIC && yperiodic) {
|
||||
if (x(i,1) < lo[1]) {
|
||||
x(i,1) += period[1];
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
|
||||
v(i,0) += h_rate[5];
|
||||
v(i,1) += h_rate[1];
|
||||
}
|
||||
int idim = (image[i] >> IMGBITS) & IMGMASK;
|
||||
const int otherdims = image[i] ^ (idim << IMGBITS);
|
||||
idim--;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | (idim << IMGBITS);
|
||||
}
|
||||
if (x(i,1) >= hi[1]) {
|
||||
x(i,1) -= period[1];
|
||||
x(i,1) = MAX(x(i,1),lo[1]);
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
|
||||
v(i,0) -= h_rate[5];
|
||||
v(i,1) -= h_rate[1];
|
||||
}
|
||||
int idim = (image[i] >> IMGBITS) & IMGMASK;
|
||||
const int otherdims = image[i] ^ (idim << IMGBITS);
|
||||
idim++;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | (idim << IMGBITS);
|
||||
}
|
||||
}
|
||||
|
||||
if (PERIODIC && zperiodic) {
|
||||
if (x(i,2) < lo[2]) {
|
||||
x(i,2) += period[2];
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
|
||||
v(i,0) += h_rate[4];
|
||||
v(i,1) += h_rate[3];
|
||||
v(i,2) += h_rate[2];
|
||||
}
|
||||
int idim = image[i] >> IMG2BITS;
|
||||
const int otherdims = image[i] ^ (idim << IMG2BITS);
|
||||
idim--;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | (idim << IMG2BITS);
|
||||
}
|
||||
if (x(i,2) >= hi[2]) {
|
||||
x(i,2) -= period[2];
|
||||
x(i,2) = MAX(x(i,2),lo[2]);
|
||||
if (DEFORM_VREMAP && (mask[i] & deform_groupbit)) {
|
||||
v(i,0) -= h_rate[4];
|
||||
v(i,1) -= h_rate[3];
|
||||
v(i,2) -= h_rate[2];
|
||||
}
|
||||
int idim = image[i] >> IMG2BITS;
|
||||
const int otherdims = image[i] ^ (idim << IMG2BITS);
|
||||
idim++;
|
||||
idim &= IMGMASK;
|
||||
image[i] = otherdims | (idim << IMG2BITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
enforce PBC and modify box image flags for each atom
|
||||
called every reneighboring and by other commands that change atoms
|
||||
resulting coord must satisfy lo <= coord < hi
|
||||
MAX is important since coord - prd < lo can happen when coord = hi
|
||||
if fix deform, remap velocity of fix group atoms by box edge velocities
|
||||
for triclinic, atoms must be in lamda coords (0-1) before pbc is called
|
||||
image = 10 bits for each dimension
|
||||
increment/decrement in wrap-around fashion
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void DomainKokkos::pbc()
|
||||
{
|
||||
double *lo,*hi,*period;
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
if (triclinic == 0) {
|
||||
lo = boxlo;
|
||||
hi = boxhi;
|
||||
period = prd;
|
||||
} else {
|
||||
lo = boxlo_lamda;
|
||||
hi = boxhi_lamda;
|
||||
period = prd_lamda;
|
||||
}
|
||||
|
||||
atomKK->sync(Device,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK);
|
||||
atomKK->modified(Device,X_MASK|V_MASK);
|
||||
|
||||
if (xperiodic || yperiodic || zperiodic) {
|
||||
if (deform_vremap) {
|
||||
DomainPBCFunctor<LMPDeviceType,1,1>
|
||||
f(lo,hi,period,
|
||||
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
|
||||
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
} else {
|
||||
DomainPBCFunctor<LMPDeviceType,1,0>
|
||||
f(lo,hi,period,
|
||||
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
|
||||
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
}
|
||||
} else {
|
||||
if (deform_vremap) {
|
||||
DomainPBCFunctor<LMPDeviceType,0,1>
|
||||
f(lo,hi,period,
|
||||
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
|
||||
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
} else {
|
||||
DomainPBCFunctor<LMPDeviceType,0,0>
|
||||
f(lo,hi,period,
|
||||
atomKK->k_x,atomKK->k_v,atomKK->k_mask,atomKK->k_image,
|
||||
deform_groupbit,h_rate,xperiodic,yperiodic,zperiodic);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
}
|
||||
}
|
||||
|
||||
LMPDeviceType::fence();
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_DOMAIN_KOKKOS_H
|
||||
#define LMP_DOMAIN_KOKKOS_H
|
||||
|
||||
#include "domain.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class DomainKokkos : public Domain {
|
||||
public:
|
||||
class AtomKokkos *atomKK;
|
||||
|
||||
DomainKokkos(class LAMMPS *);
|
||||
~DomainKokkos() {}
|
||||
void init();
|
||||
void pbc();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,177 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "fix_nve_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "respa.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
FixNVEKokkos<DeviceType>::FixNVEKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNVE(lmp, narg, arg)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK;
|
||||
datamask_modify = X_MASK | V_MASK | F_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNVEKokkos<DeviceType>::init()
|
||||
{
|
||||
FixNVE::init();
|
||||
|
||||
atomKK->k_mass.modify<LMPHostType>();
|
||||
atomKK->k_mass.sync<LMPDeviceType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allow for both per-type and per-atom mass
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNVEKokkos<DeviceType>::initial_integrate(int vflag)
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
rmass = atomKK->rmass;
|
||||
mass = atomKK->k_mass.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
int nlocal = atomKK->nlocal;
|
||||
if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
|
||||
|
||||
if (rmass) {
|
||||
FixNVEKokkosInitialIntegrateFunctor<DeviceType,1> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
} else {
|
||||
FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
}
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNVEKokkos<DeviceType>::initial_integrate_item(int i) const
|
||||
{
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = dtf / mass[type[i]];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
x(i,0) += dtv * v(i,0);
|
||||
x(i,1) += dtv * v(i,1);
|
||||
x(i,2) += dtv * v(i,2);
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNVEKokkos<DeviceType>::initial_integrate_rmass_item(int i) const
|
||||
{
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = dtf / rmass[type[i]];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
x(i,0) += dtv * v(i,0);
|
||||
x(i,1) += dtv * v(i,1);
|
||||
x(i,2) += dtv * v(i,2);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNVEKokkos<DeviceType>::final_integrate()
|
||||
{
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
atomKK->modified(execution_space,datamask_modify);
|
||||
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
rmass = atomKK->rmass;
|
||||
mass = atomKK->k_mass.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
int nlocal = atomKK->nlocal;
|
||||
if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst;
|
||||
|
||||
if (rmass) {
|
||||
FixNVEKokkosFinalIntegrateFunctor<DeviceType,1> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
} else {
|
||||
FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
}
|
||||
DeviceType::fence();
|
||||
|
||||
// debug
|
||||
//atomKK->sync(Host,datamask_read);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNVEKokkos<DeviceType>::final_integrate_item(int i) const
|
||||
{
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = dtf / mass[type[i]];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixNVEKokkos<DeviceType>::final_integrate_rmass_item(int i) const
|
||||
{
|
||||
if (mask[i] & groupbit) {
|
||||
const double dtfm = dtf / rmass[type[i]];
|
||||
v(i,0) += dtfm * f(i,0);
|
||||
v(i,1) += dtfm * f(i,1);
|
||||
v(i,2) += dtfm * f(i,2);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void FixNVEKokkos<DeviceType>::cleanup_copy()
|
||||
{
|
||||
id = style = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
template class FixNVEKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
template class FixNVEKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,110 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(nve/kk,FixNVEKokkos<LMPDeviceType>)
|
||||
FixStyle(nve/kk/device,FixNVEKokkos<LMPDeviceType>)
|
||||
FixStyle(nve/kk/host,FixNVEKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NVE_KOKKOS_H
|
||||
#define LMP_FIX_NVE_KOKKOS_H
|
||||
|
||||
#include "fix_nve.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class FixNVEKokkos;
|
||||
|
||||
template <class DeviceType, int RMass>
|
||||
class FixNVEKokkosInitialIntegrateFunctor;
|
||||
template <class DeviceType, int RMass>
|
||||
class FixNVEKokkosFinalIntegrateFunctor;
|
||||
|
||||
template<class DeviceType>
|
||||
class FixNVEKokkos : public FixNVE {
|
||||
public:
|
||||
FixNVEKokkos(class LAMMPS *, int, char **);
|
||||
~FixNVEKokkos() {}
|
||||
void cleanup_copy();
|
||||
void init();
|
||||
void initial_integrate(int);
|
||||
void final_integrate();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void initial_integrate_item(int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void initial_integrate_rmass_item(int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void final_integrate_item(int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void final_integrate_rmass_item(int) const;
|
||||
|
||||
private:
|
||||
class AtomKokkos *atomKK;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array x;
|
||||
typename ArrayTypes<DeviceType>::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_f_array_const f;
|
||||
double *rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
};
|
||||
|
||||
template <class DeviceType, int RMass>
|
||||
struct FixNVEKokkosInitialIntegrateFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixNVEKokkos<DeviceType> c;
|
||||
|
||||
FixNVEKokkosInitialIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (RMass) c.initial_integrate_rmass_item(i);
|
||||
else c.initial_integrate_item(i);
|
||||
}
|
||||
};
|
||||
|
||||
template <class DeviceType, int RMass>
|
||||
struct FixNVEKokkosFinalIntegrateFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
FixNVEKokkos<DeviceType> c;
|
||||
|
||||
FixNVEKokkosFinalIntegrateFunctor(FixNVEKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {c.cleanup_copy();};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (RMass) c.final_integrate_rmass_item(i);
|
||||
else c.final_integrate_item(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
*/
|
|
@ -0,0 +1,220 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "stdlib.h"
|
||||
#include "ctype.h"
|
||||
#include "kokkos.h"
|
||||
#include "lammps.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{FULL,HALFTHREAD,HALF};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
{
|
||||
kokkos_exists = 1;
|
||||
lmp->kokkos = this;
|
||||
|
||||
// process any command-line args that invoke Kokkos settings
|
||||
|
||||
int device = 0;
|
||||
int num_threads = 1;
|
||||
int numa = 1;
|
||||
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"d") == 0 || strcmp(arg[iarg],"device") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
device = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"g") == 0 ||
|
||||
strcmp(arg[iarg],"gpus") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
int ngpu = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
int skip_gpu = 9999;
|
||||
if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
|
||||
skip_gpu = atoi(arg[iarg+2]);
|
||||
iarg++;
|
||||
}
|
||||
|
||||
char *str;
|
||||
if (str = getenv("SLURM_LOCALID")) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
|
||||
int local_rank = atoi(str);
|
||||
device = local_rank % ngpu;
|
||||
if (device >= skip_gpu) device++;
|
||||
}
|
||||
|
||||
} else if (strcmp(arg[iarg],"t") == 0 ||
|
||||
strcmp(arg[iarg],"threads") == 0) {
|
||||
num_threads = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"n") == 0 ||
|
||||
strcmp(arg[iarg],"numa") == 0) {
|
||||
numa = atoi(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
}
|
||||
|
||||
// initialize Kokkos
|
||||
|
||||
#if DEVICE==2
|
||||
Kokkos::Cuda::host_mirror_device_type::initialize(num_threads,numa);
|
||||
Kokkos::Cuda::SelectDevice select_device(device);
|
||||
Kokkos::Cuda::initialize(select_device);
|
||||
#else
|
||||
LMPHostType::initialize(num_threads,numa);
|
||||
#endif
|
||||
|
||||
// default settings for package kokkos command
|
||||
|
||||
neighflag = FULL;
|
||||
exchange_comm_classic = 0;
|
||||
forward_comm_classic = 0;
|
||||
exchange_comm_on_host = 1;
|
||||
forward_comm_on_host = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
KokkosLMP::~KokkosLMP()
|
||||
{
|
||||
// finalize Kokkos
|
||||
|
||||
#if DEVICE==2
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::Cuda::host_mirror_device_type::finalize();
|
||||
#else
|
||||
LMPHostType::finalize();
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
invoked by package kokkos command
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void KokkosLMP::accelerator(int narg, char **arg)
|
||||
{
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"neigh") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
|
||||
if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL;
|
||||
else if (strcmp(arg[iarg+1],"half/thread") == 0) neighflag = HALFTHREAD;
|
||||
else if (strcmp(arg[iarg+1],"half") == 0) neighflag = HALF;
|
||||
else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
|
||||
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
|
||||
else error->all(FLERR,"Illegal package command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/exchange") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) exchange_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
exchange_comm_classic = 0;
|
||||
exchange_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
exchange_comm_classic = 0;
|
||||
exchange_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
forward_comm_classic = 0;
|
||||
forward_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
forward_comm_classic = 0;
|
||||
forward_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package command");
|
||||
iarg += 2;
|
||||
} else error->all(FLERR,"Illegal package command");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_list_kokkos(int m)
|
||||
{
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_count(int m)
|
||||
{
|
||||
int inum;
|
||||
int nneigh = 0;
|
||||
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_ilist;
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
|
||||
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m]) {
|
||||
inum = nk->lists_host[m]->inum;
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_host[m]->d_ilist;
|
||||
h_numneigh = nk->lists_host[m]->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
|
||||
} else if (nk->lists_device[m]) {
|
||||
inum = nk->lists_device[m]->inum;
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_device[m]->d_ilist;
|
||||
h_numneigh = nk->lists_device[m]->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
|
||||
|
||||
return nneigh;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef KOKKOS_LMP_H
|
||||
#define KOKKOS_LMP_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class KokkosLMP : protected Pointers {
|
||||
public:
|
||||
int kokkos_exists;
|
||||
int neighflag;
|
||||
int exchange_comm_classic;
|
||||
int forward_comm_classic;
|
||||
int exchange_comm_on_host;
|
||||
int forward_comm_on_host;
|
||||
|
||||
KokkosLMP(class LAMMPS *, int, char **);
|
||||
~KokkosLMP();
|
||||
void accelerator(int, char **);
|
||||
int neigh_list_kokkos(int);
|
||||
int neigh_count(int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,617 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_LMPTYPE_KOKKOS_H
|
||||
#define LMP_LMPTYPE_KOKKOS_H
|
||||
|
||||
#include <Kokkos_View.hpp>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <Kokkos_Atomic.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Vectorization.hpp>
|
||||
|
||||
#define MAX_TYPES_STACKPARAMS 12
|
||||
#define NeighClusterSize 8
|
||||
// set LMPHostype and LMPDeviceType
|
||||
|
||||
#ifndef DEVICE
|
||||
#define DEVICE 1
|
||||
#endif
|
||||
|
||||
#if DEVICE==1
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#include "Kokkos_OpenMP.hpp"
|
||||
typedef Kokkos::OpenMP LMPDeviceType;
|
||||
typedef Kokkos::OpenMP LMPHostType;
|
||||
#else
|
||||
#include "Kokkos_Threads.hpp"
|
||||
typedef Kokkos::Threads LMPDeviceType;
|
||||
typedef Kokkos::Threads LMPHostType;
|
||||
#endif
|
||||
#ifndef __CUDACC__
|
||||
struct double2 {
|
||||
double x, y;
|
||||
};
|
||||
struct float2 {
|
||||
float x, y;
|
||||
};
|
||||
struct double4 {
|
||||
double x, y, z, w;
|
||||
};
|
||||
struct float4 {
|
||||
float x, y, z, w;
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
#include "cuda.h"
|
||||
#include "cuda_runtime.h"
|
||||
#include "Kokkos_Cuda.hpp"
|
||||
#include "Kokkos_Threads.hpp"
|
||||
typedef Kokkos::Cuda LMPDeviceType;
|
||||
typedef Kokkos::Cuda::host_mirror_device_type LMPHostType;
|
||||
#endif
|
||||
|
||||
// set ExecutionSpace stuct with variable "space"
|
||||
|
||||
template<class Device>
|
||||
struct ExecutionSpaceFromDevice;
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::OpenMP> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
|
||||
};
|
||||
#else
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Threads> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Host;
|
||||
};
|
||||
#endif
|
||||
#if DEVICE==2
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
|
||||
};
|
||||
#endif
|
||||
|
||||
// define precision
|
||||
// handle global precision, force, energy, positions, kspace separately
|
||||
|
||||
#ifndef PRECISION
|
||||
#define PRECISION 2
|
||||
#endif
|
||||
#if PRECISION==1
|
||||
typedef float LMP_FLOAT;
|
||||
typedef float2 LMP_FLOAT2;
|
||||
typedef float4 LMP_FLOAT4;
|
||||
#else
|
||||
typedef double LMP_FLOAT;
|
||||
typedef double2 LMP_FLOAT2;
|
||||
typedef double4 LMP_FLOAT4;
|
||||
#endif
|
||||
|
||||
#ifndef PREC_FORCE
|
||||
#define PREC_FORCE PRECISION
|
||||
#endif
|
||||
|
||||
#if PREC_FORCE==1
|
||||
typedef float F_FLOAT;
|
||||
typedef float2 F_FLOAT2;
|
||||
typedef float4 F_FLOAT4;
|
||||
#else
|
||||
typedef double F_FLOAT;
|
||||
typedef double2 F_FLOAT2;
|
||||
typedef double4 F_FLOAT4;
|
||||
#endif
|
||||
|
||||
#ifndef PREC_ENERGY
|
||||
#define PREC_ENERGY PRECISION
|
||||
#endif
|
||||
|
||||
#if PREC_ENERGY==1
|
||||
typedef float E_FLOAT;
|
||||
typedef float2 E_FLOAT2;
|
||||
typedef float4 E_FLOAT4;
|
||||
#else
|
||||
typedef double E_FLOAT;
|
||||
typedef double2 E_FLOAT2;
|
||||
typedef double4 E_FLOAT4;
|
||||
#endif
|
||||
|
||||
struct s_EV_FLOAT {
|
||||
E_FLOAT evdwl;
|
||||
E_FLOAT ecoul;
|
||||
E_FLOAT v[6];
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_EV_FLOAT() {
|
||||
evdwl = 0;
|
||||
ecoul = 0;
|
||||
v[0] = 0; v[1] = 0; v[2] = 0;
|
||||
v[3] = 0; v[4] = 0; v[5] = 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_EV_FLOAT& operator+=(const s_EV_FLOAT &rhs) {
|
||||
evdwl += rhs.evdwl;
|
||||
ecoul += rhs.ecoul;
|
||||
v[0] += rhs.v[0];
|
||||
v[1] += rhs.v[1];
|
||||
v[2] += rhs.v[2];
|
||||
v[3] += rhs.v[3];
|
||||
v[4] += rhs.v[4];
|
||||
v[5] += rhs.v[5];
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
typedef struct s_EV_FLOAT EV_FLOAT;
|
||||
|
||||
#ifndef PREC_POS
|
||||
#define PREC_POS PRECISION
|
||||
#endif
|
||||
|
||||
#if PREC_POS==1
|
||||
typedef float X_FLOAT;
|
||||
typedef float2 X_FLOAT2;
|
||||
typedef float4 X_FLOAT4;
|
||||
#else
|
||||
typedef double X_FLOAT;
|
||||
typedef double2 X_FLOAT2;
|
||||
typedef double4 X_FLOAT4;
|
||||
#endif
|
||||
|
||||
#ifndef PREC_VELOCITIES
|
||||
#define PREC_VELOCITIES PRECISION
|
||||
#endif
|
||||
|
||||
#if PREC_VELOCITIES==1
|
||||
typedef float V_FLOAT;
|
||||
typedef float2 V_FLOAT2;
|
||||
typedef float4 V_FLOAT4;
|
||||
#else
|
||||
typedef double V_FLOAT;
|
||||
typedef double2 V_FLOAT2;
|
||||
typedef double4 V_FLOAT4;
|
||||
#endif
|
||||
|
||||
#if PREC_KSPACE==1
|
||||
typedef float K_FLOAT;
|
||||
typedef float2 K_FLOAT2;
|
||||
typedef float4 K_FLOAT4;
|
||||
#else
|
||||
typedef double K_FLOAT;
|
||||
typedef double2 K_FLOAT2;
|
||||
typedef double4 K_FLOAT4;
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// LAMMPS types
|
||||
|
||||
template <class DeviceType>
|
||||
struct ArrayTypes;
|
||||
|
||||
template <>
|
||||
struct ArrayTypes<LMPDeviceType> {
|
||||
|
||||
// scalar types
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
|
||||
typedef tdual_int_scalar::t_dev t_int_scalar;
|
||||
typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_float_scalar;
|
||||
typedef tdual_float_scalar::t_dev t_float_scalar;
|
||||
typedef tdual_float_scalar::t_dev_const t_float_scalar_const;
|
||||
typedef tdual_float_scalar::t_dev_um t_float_scalar_um;
|
||||
typedef tdual_float_scalar::t_dev_const_um t_float_scalar_const_um;
|
||||
|
||||
// generic array types
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
|
||||
typedef tdual_int_1d::t_dev t_int_1d;
|
||||
typedef tdual_int_1d::t_dev_const t_int_1d_const;
|
||||
typedef tdual_int_1d::t_dev_um t_int_1d_um;
|
||||
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
|
||||
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
|
||||
typedef tdual_int_2d::t_dev t_int_2d;
|
||||
typedef tdual_int_2d::t_dev_const t_int_2d_const;
|
||||
typedef tdual_int_2d::t_dev_um t_int_2d_um;
|
||||
typedef tdual_int_2d::t_dev_const_um t_int_2d_const_um;
|
||||
typedef tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_dev t_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_dev_const t_tagint_1d_const;
|
||||
typedef tdual_tagint_1d::t_dev_um t_tagint_1d_um;
|
||||
typedef tdual_tagint_1d::t_dev_const_um t_tagint_1d_const_um;
|
||||
typedef tdual_tagint_1d::t_dev_const_randomread t_tagint_1d_randomread;
|
||||
|
||||
// 1d float array n
|
||||
|
||||
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
|
||||
typedef tdual_float_1d::t_dev t_float_1d;
|
||||
typedef tdual_float_1d::t_dev_const t_float_1d_const;
|
||||
typedef tdual_float_1d::t_dev_um t_float_1d_um;
|
||||
typedef tdual_float_1d::t_dev_const_um t_float_1d_const_um;
|
||||
typedef tdual_float_1d::t_dev_const_randomread t_float_1d_randomread;
|
||||
|
||||
//2d float array n
|
||||
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
|
||||
typedef tdual_float_2d::t_dev t_float_2d;
|
||||
typedef tdual_float_2d::t_dev_const t_float_2d_const;
|
||||
typedef tdual_float_2d::t_dev_um t_float_2d_um;
|
||||
typedef tdual_float_2d::t_dev_const_um t_float_2d_const_um;
|
||||
typedef tdual_float_2d::t_dev_const_randomread t_float_2d_randomread;
|
||||
|
||||
//Position Types
|
||||
//1d X_FLOAT array n
|
||||
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
|
||||
typedef tdual_xfloat_1d::t_dev t_xfloat_1d;
|
||||
typedef tdual_xfloat_1d::t_dev_const t_xfloat_1d_const;
|
||||
typedef tdual_xfloat_1d::t_dev_um t_xfloat_1d_um;
|
||||
typedef tdual_xfloat_1d::t_dev_const_um t_xfloat_1d_const_um;
|
||||
typedef tdual_xfloat_1d::t_dev_const_randomread t_xfloat_1d_randomread;
|
||||
|
||||
//2d X_FLOAT array n*m
|
||||
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
|
||||
typedef tdual_xfloat_2d::t_dev t_xfloat_2d;
|
||||
typedef tdual_xfloat_2d::t_dev_const t_xfloat_2d_const;
|
||||
typedef tdual_xfloat_2d::t_dev_um t_xfloat_2d_um;
|
||||
typedef tdual_xfloat_2d::t_dev_const_um t_xfloat_2d_const_um;
|
||||
typedef tdual_xfloat_2d::t_dev_const_randomread t_xfloat_2d_randomread;
|
||||
|
||||
//2d X_FLOAT array n*4
|
||||
#ifdef LMP_KOKKOS_NO_LEGACY
|
||||
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutLeft, LMPDeviceType> tdual_x_array;
|
||||
#else
|
||||
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
|
||||
#endif
|
||||
typedef tdual_x_array::t_dev t_x_array;
|
||||
typedef tdual_x_array::t_dev_const t_x_array_const;
|
||||
typedef tdual_x_array::t_dev_um t_x_array_um;
|
||||
typedef tdual_x_array::t_dev_const_um t_x_array_const_um;
|
||||
typedef tdual_x_array::t_dev_const_randomread t_x_array_randomread;
|
||||
|
||||
//Velocity Types
|
||||
//1d V_FLOAT array n
|
||||
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
|
||||
typedef tdual_vfloat_1d::t_dev t_vfloat_1d;
|
||||
typedef tdual_vfloat_1d::t_dev_const t_vfloat_1d_const;
|
||||
typedef tdual_vfloat_1d::t_dev_um t_vfloat_1d_um;
|
||||
typedef tdual_vfloat_1d::t_dev_const_um t_vfloat_1d_const_um;
|
||||
typedef tdual_vfloat_1d::t_dev_const_randomread t_vfloat_1d_randomread;
|
||||
|
||||
//2d V_FLOAT array n*m
|
||||
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
|
||||
typedef tdual_vfloat_2d::t_dev t_vfloat_2d;
|
||||
typedef tdual_vfloat_2d::t_dev_const t_vfloat_2d_const;
|
||||
typedef tdual_vfloat_2d::t_dev_um t_vfloat_2d_um;
|
||||
typedef tdual_vfloat_2d::t_dev_const_um t_vfloat_2d_const_um;
|
||||
typedef tdual_vfloat_2d::t_dev_const_randomread t_vfloat_2d_randomread;
|
||||
|
||||
//2d V_FLOAT array n*3
|
||||
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
|
||||
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
|
||||
typedef tdual_v_array::t_dev t_v_array;
|
||||
typedef tdual_v_array::t_dev_const t_v_array_const;
|
||||
typedef tdual_v_array::t_dev_um t_v_array_um;
|
||||
typedef tdual_v_array::t_dev_const_um t_v_array_const_um;
|
||||
typedef tdual_v_array::t_dev_const_randomread t_v_array_randomread;
|
||||
|
||||
//Force Types
|
||||
//1d F_FLOAT array n
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
|
||||
typedef tdual_ffloat_1d::t_dev t_ffloat_1d;
|
||||
typedef tdual_ffloat_1d::t_dev_const t_ffloat_1d_const;
|
||||
typedef tdual_ffloat_1d::t_dev_um t_ffloat_1d_um;
|
||||
typedef tdual_ffloat_1d::t_dev_const_um t_ffloat_1d_const_um;
|
||||
typedef tdual_ffloat_1d::t_dev_const_randomread t_ffloat_1d_randomread;
|
||||
|
||||
//2d F_FLOAT array n*m
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
|
||||
typedef tdual_ffloat_2d::t_dev t_ffloat_2d;
|
||||
typedef tdual_ffloat_2d::t_dev_const t_ffloat_2d_const;
|
||||
typedef tdual_ffloat_2d::t_dev_um t_ffloat_2d_um;
|
||||
typedef tdual_ffloat_2d::t_dev_const_um t_ffloat_2d_const_um;
|
||||
typedef tdual_ffloat_2d::t_dev_const_randomread t_ffloat_2d_randomread;
|
||||
|
||||
//2d F_FLOAT array n*3
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
|
||||
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
|
||||
typedef tdual_f_array::t_dev t_f_array;
|
||||
typedef tdual_f_array::t_dev_const t_f_array_const;
|
||||
typedef tdual_f_array::t_dev_um t_f_array_um;
|
||||
typedef tdual_f_array::t_dev_const_um t_f_array_const_um;
|
||||
typedef tdual_f_array::t_dev_const_randomread t_f_array_randomread;
|
||||
|
||||
//2d F_FLOAT array n*6 (for virial)
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
|
||||
typedef tdual_virial_array::t_dev t_virial_array;
|
||||
typedef tdual_virial_array::t_dev_const t_virial_array_const;
|
||||
typedef tdual_virial_array::t_dev_um t_virial_array_um;
|
||||
typedef tdual_virial_array::t_dev_const_um t_virial_array_const_um;
|
||||
typedef tdual_virial_array::t_dev_const_randomread t_virial_array_randomread;
|
||||
|
||||
//Energy Types
|
||||
//1d E_FLOAT array n
|
||||
|
||||
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
|
||||
typedef tdual_efloat_1d::t_dev t_efloat_1d;
|
||||
typedef tdual_efloat_1d::t_dev_const t_efloat_1d_const;
|
||||
typedef tdual_efloat_1d::t_dev_um t_efloat_1d_um;
|
||||
typedef tdual_efloat_1d::t_dev_const_um t_efloat_1d_const_um;
|
||||
typedef tdual_efloat_1d::t_dev_const_randomread t_efloat_1d_randomread;
|
||||
|
||||
//2d E_FLOAT array n*m
|
||||
|
||||
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
|
||||
typedef tdual_efloat_2d::t_dev t_efloat_2d;
|
||||
typedef tdual_efloat_2d::t_dev_const t_efloat_2d_const;
|
||||
typedef tdual_efloat_2d::t_dev_um t_efloat_2d_um;
|
||||
typedef tdual_efloat_2d::t_dev_const_um t_efloat_2d_const_um;
|
||||
typedef tdual_efloat_2d::t_dev_const_randomread t_efloat_2d_randomread;
|
||||
|
||||
//2d E_FLOAT array n*3
|
||||
|
||||
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
|
||||
typedef tdual_e_array::t_dev t_e_array;
|
||||
typedef tdual_e_array::t_dev_const t_e_array_const;
|
||||
typedef tdual_e_array::t_dev_um t_e_array_um;
|
||||
typedef tdual_e_array::t_dev_const_um t_e_array_const_um;
|
||||
typedef tdual_e_array::t_dev_const_randomread t_e_array_randomread;
|
||||
|
||||
//Neighbor Types
|
||||
|
||||
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
|
||||
typedef tdual_neighbors_2d::t_dev t_neighbors_2d;
|
||||
typedef tdual_neighbors_2d::t_dev_const t_neighbors_2d_const;
|
||||
typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
|
||||
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
|
||||
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
|
||||
|
||||
};
|
||||
|
||||
#if DEVICE==2
|
||||
template <>
|
||||
struct ArrayTypes<LMPHostType> {
|
||||
|
||||
//Scalar Types
|
||||
|
||||
typedef Kokkos::DualView<int, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_scalar;
|
||||
typedef tdual_int_scalar::t_host t_int_scalar;
|
||||
typedef tdual_int_scalar::t_host_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
|
||||
typedef tdual_float_scalar::t_host t_float_scalar;
|
||||
typedef tdual_float_scalar::t_host_const t_float_scalar_const;
|
||||
typedef tdual_float_scalar::t_host_um t_float_scalar_um;
|
||||
typedef tdual_float_scalar::t_host_const_um t_float_scalar_const_um;
|
||||
|
||||
//Generic ArrayTypes
|
||||
typedef Kokkos::DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_1d;
|
||||
typedef tdual_int_1d::t_host t_int_1d;
|
||||
typedef tdual_int_1d::t_host_const t_int_1d_const;
|
||||
typedef tdual_int_1d::t_host_um t_int_1d_um;
|
||||
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
|
||||
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
|
||||
|
||||
typedef Kokkos::DualView<int**, Kokkos::LayoutRight, LMPDeviceType> tdual_int_2d;
|
||||
typedef tdual_int_2d::t_host t_int_2d;
|
||||
typedef tdual_int_2d::t_host_const t_int_2d_const;
|
||||
typedef tdual_int_2d::t_host_um t_int_2d_um;
|
||||
typedef tdual_int_2d::t_host_const_um t_int_2d_const_um;
|
||||
typedef tdual_int_2d::t_host_const_randomread t_int_2d_randomread;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_host t_tagint_1d;
|
||||
typedef tdual_tagint_1d::t_host_const t_tagint_1d_const;
|
||||
typedef tdual_tagint_1d::t_host_um t_tagint_1d_um;
|
||||
typedef tdual_tagint_1d::t_host_const_um t_tagint_1d_const_um;
|
||||
typedef tdual_tagint_1d::t_host_const_randomread t_tagint_1d_randomread;
|
||||
|
||||
//1d float array n
|
||||
typedef Kokkos::DualView<LMP_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_1d;
|
||||
typedef tdual_float_1d::t_host t_float_1d;
|
||||
typedef tdual_float_1d::t_host_const t_float_1d_const;
|
||||
typedef tdual_float_1d::t_host_um t_float_1d_um;
|
||||
typedef tdual_float_1d::t_host_const_um t_float_1d_const_um;
|
||||
typedef tdual_float_1d::t_host_const_randomread t_float_1d_randomread;
|
||||
|
||||
//2d float array n
|
||||
typedef Kokkos::DualView<LMP_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_float_2d;
|
||||
typedef tdual_float_2d::t_host t_float_2d;
|
||||
typedef tdual_float_2d::t_host_const t_float_2d_const;
|
||||
typedef tdual_float_2d::t_host_um t_float_2d_um;
|
||||
typedef tdual_float_2d::t_host_const_um t_float_2d_const_um;
|
||||
typedef tdual_float_2d::t_host_const_randomread t_float_2d_randomread;
|
||||
|
||||
//Position Types
|
||||
//1d X_FLOAT array n
|
||||
typedef Kokkos::DualView<X_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_xfloat_1d;
|
||||
typedef tdual_xfloat_1d::t_host t_xfloat_1d;
|
||||
typedef tdual_xfloat_1d::t_host_const t_xfloat_1d_const;
|
||||
typedef tdual_xfloat_1d::t_host_um t_xfloat_1d_um;
|
||||
typedef tdual_xfloat_1d::t_host_const_um t_xfloat_1d_const_um;
|
||||
typedef tdual_xfloat_1d::t_host_const_randomread t_xfloat_1d_randomread;
|
||||
|
||||
//2d X_FLOAT array n*m
|
||||
typedef Kokkos::DualView<X_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_xfloat_2d;
|
||||
typedef tdual_xfloat_2d::t_host t_xfloat_2d;
|
||||
typedef tdual_xfloat_2d::t_host_const t_xfloat_2d_const;
|
||||
typedef tdual_xfloat_2d::t_host_um t_xfloat_2d_um;
|
||||
typedef tdual_xfloat_2d::t_host_const_um t_xfloat_2d_const_um;
|
||||
typedef tdual_xfloat_2d::t_host_const_randomread t_xfloat_2d_randomread;
|
||||
|
||||
//2d X_FLOAT array n*3
|
||||
typedef Kokkos::DualView<X_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_x_array;
|
||||
typedef tdual_x_array::t_host t_x_array;
|
||||
typedef tdual_x_array::t_host_const t_x_array_const;
|
||||
typedef tdual_x_array::t_host_um t_x_array_um;
|
||||
typedef tdual_x_array::t_host_const_um t_x_array_const_um;
|
||||
typedef tdual_x_array::t_host_const_randomread t_x_array_randomread;
|
||||
|
||||
//Velocity Types
|
||||
//1d V_FLOAT array n
|
||||
typedef Kokkos::DualView<V_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_vfloat_1d;
|
||||
typedef tdual_vfloat_1d::t_host t_vfloat_1d;
|
||||
typedef tdual_vfloat_1d::t_host_const t_vfloat_1d_const;
|
||||
typedef tdual_vfloat_1d::t_host_um t_vfloat_1d_um;
|
||||
typedef tdual_vfloat_1d::t_host_const_um t_vfloat_1d_const_um;
|
||||
typedef tdual_vfloat_1d::t_host_const_randomread t_vfloat_1d_randomread;
|
||||
|
||||
//2d V_FLOAT array n*m
|
||||
typedef Kokkos::DualView<V_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_vfloat_2d;
|
||||
typedef tdual_vfloat_2d::t_host t_vfloat_2d;
|
||||
typedef tdual_vfloat_2d::t_host_const t_vfloat_2d_const;
|
||||
typedef tdual_vfloat_2d::t_host_um t_vfloat_2d_um;
|
||||
typedef tdual_vfloat_2d::t_host_const_um t_vfloat_2d_const_um;
|
||||
typedef tdual_vfloat_2d::t_host_const_randomread t_vfloat_2d_randomread;
|
||||
|
||||
//2d V_FLOAT array n*3
|
||||
typedef Kokkos::DualView<V_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_v_array;
|
||||
//typedef Kokkos::DualView<V_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_v_array;
|
||||
typedef tdual_v_array::t_host t_v_array;
|
||||
typedef tdual_v_array::t_host_const t_v_array_const;
|
||||
typedef tdual_v_array::t_host_um t_v_array_um;
|
||||
typedef tdual_v_array::t_host_const_um t_v_array_const_um;
|
||||
typedef tdual_v_array::t_host_const_randomread t_v_array_randomread;
|
||||
|
||||
//Force Types
|
||||
//1d F_FLOAT array n
|
||||
typedef Kokkos::DualView<F_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_ffloat_1d;
|
||||
typedef tdual_ffloat_1d::t_host t_ffloat_1d;
|
||||
typedef tdual_ffloat_1d::t_host_const t_ffloat_1d_const;
|
||||
typedef tdual_ffloat_1d::t_host_um t_ffloat_1d_um;
|
||||
typedef tdual_ffloat_1d::t_host_const_um t_ffloat_1d_const_um;
|
||||
typedef tdual_ffloat_1d::t_host_const_randomread t_ffloat_1d_randomread;
|
||||
|
||||
//2d F_FLOAT array n*m
|
||||
typedef Kokkos::DualView<F_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_ffloat_2d;
|
||||
typedef tdual_ffloat_2d::t_host t_ffloat_2d;
|
||||
typedef tdual_ffloat_2d::t_host_const t_ffloat_2d_const;
|
||||
typedef tdual_ffloat_2d::t_host_um t_ffloat_2d_um;
|
||||
typedef tdual_ffloat_2d::t_host_const_um t_ffloat_2d_const_um;
|
||||
typedef tdual_ffloat_2d::t_host_const_randomread t_ffloat_2d_randomread;
|
||||
|
||||
//2d F_FLOAT array n*3
|
||||
typedef Kokkos::DualView<F_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_f_array;
|
||||
//typedef Kokkos::DualView<F_FLOAT*[3], LMPDeviceType::array_layout, LMPDeviceType> tdual_f_array;
|
||||
typedef tdual_f_array::t_host t_f_array;
|
||||
typedef tdual_f_array::t_host_const t_f_array_const;
|
||||
typedef tdual_f_array::t_host_um t_f_array_um;
|
||||
typedef tdual_f_array::t_host_const_um t_f_array_const_um;
|
||||
typedef tdual_f_array::t_host_const_randomread t_f_array_randomread;
|
||||
|
||||
//2d F_FLOAT array n*6 (for virial)
|
||||
typedef Kokkos::DualView<F_FLOAT*[6], Kokkos::LayoutRight, LMPDeviceType> tdual_virial_array;
|
||||
typedef tdual_virial_array::t_host t_virial_array;
|
||||
typedef tdual_virial_array::t_host_const t_virial_array_const;
|
||||
typedef tdual_virial_array::t_host_um t_virial_array_um;
|
||||
typedef tdual_virial_array::t_host_const_um t_virial_array_const_um;
|
||||
typedef tdual_virial_array::t_host_const_randomread t_virial_array_randomread;
|
||||
|
||||
|
||||
|
||||
//Energy Types
|
||||
//1d E_FLOAT array n
|
||||
typedef Kokkos::DualView<E_FLOAT*, LMPDeviceType::array_layout, LMPDeviceType> tdual_efloat_1d;
|
||||
typedef tdual_efloat_1d::t_host t_efloat_1d;
|
||||
typedef tdual_efloat_1d::t_host_const t_efloat_1d_const;
|
||||
typedef tdual_efloat_1d::t_host_um t_efloat_1d_um;
|
||||
typedef tdual_efloat_1d::t_host_const_um t_efloat_1d_const_um;
|
||||
typedef tdual_efloat_1d::t_host_const_randomread t_efloat_1d_randomread;
|
||||
|
||||
//2d E_FLOAT array n*m
|
||||
typedef Kokkos::DualView<E_FLOAT**, Kokkos::LayoutRight, LMPDeviceType> tdual_efloat_2d;
|
||||
typedef tdual_efloat_2d::t_host t_efloat_2d;
|
||||
typedef tdual_efloat_2d::t_host_const t_efloat_2d_const;
|
||||
typedef tdual_efloat_2d::t_host_um t_efloat_2d_um;
|
||||
typedef tdual_efloat_2d::t_host_const_um t_efloat_2d_const_um;
|
||||
typedef tdual_efloat_2d::t_host_const_randomread t_efloat_2d_randomread;
|
||||
|
||||
//2d E_FLOAT array n*3
|
||||
typedef Kokkos::DualView<E_FLOAT*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_e_array;
|
||||
typedef tdual_e_array::t_host t_e_array;
|
||||
typedef tdual_e_array::t_host_const t_e_array_const;
|
||||
typedef tdual_e_array::t_host_um t_e_array_um;
|
||||
typedef tdual_e_array::t_host_const_um t_e_array_const_um;
|
||||
typedef tdual_e_array::t_host_const_randomread t_e_array_randomread;
|
||||
|
||||
//Neighbor Types
|
||||
typedef Kokkos::DualView<int**, LMPDeviceType::array_layout, LMPDeviceType> tdual_neighbors_2d;
|
||||
typedef tdual_neighbors_2d::t_host t_neighbors_2d;
|
||||
typedef tdual_neighbors_2d::t_host_const t_neighbors_2d_const;
|
||||
typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
|
||||
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
|
||||
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
|
||||
|
||||
};
|
||||
#endif
|
||||
//default LAMMPS Types
|
||||
typedef struct ArrayTypes<LMPDeviceType> DAT;
|
||||
typedef struct ArrayTypes<LMPHostType> HAT;
|
||||
|
||||
template<class DeviceType, class BufferView, class DualView>
|
||||
void buffer_view(BufferView &buf, DualView &view,
|
||||
const size_t n0,
|
||||
const size_t n1 = 0,
|
||||
const size_t n2 = 0,
|
||||
const size_t n3 = 0,
|
||||
const size_t n4 = 0,
|
||||
const size_t n5 = 0,
|
||||
const size_t n6 = 0,
|
||||
const size_t n7 = 0) {
|
||||
|
||||
buf = BufferView(
|
||||
view.template view<DeviceType>().ptr_on_device(),
|
||||
n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
struct MemsetZeroFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
void* ptr;
|
||||
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
|
||||
((int*)ptr)[i] = 0;
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType>
|
||||
void memset_kokkos (ViewType &view) {
|
||||
static MemsetZeroFunctor<typename ViewType::device_type> f;
|
||||
f.ptr = view.ptr_on_device();
|
||||
Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
|
||||
ViewType::device_type::fence();
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,208 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Kokkos versions of create/grow/destroy multi-dimensional arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create a 1d array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE create_kokkos(TYPE &data, typename TYPE::value_type *&array,
|
||||
int n1, const char *name)
|
||||
{
|
||||
data = TYPE(name,n1);
|
||||
array = data.h_view.ptr_on_device();
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE, typename HTYPE>
|
||||
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
|
||||
typename TYPE::value_type *&array, int n1,
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1);
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
array = h_data.ptr_on_device();
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
template <typename TYPE, typename HTYPE>
|
||||
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
|
||||
int n1, const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1);
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
return data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
grow or shrink 1st dim of a 1d array
|
||||
last dim must stay the same
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type *&array,
|
||||
int n1, const char *name)
|
||||
{
|
||||
if (array == NULL) return create_kokkos(data,array,n1,name);
|
||||
|
||||
data.resize(n1);
|
||||
array = data.h_view.ptr_on_device();
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
|
||||
{
|
||||
if (array == NULL) return;
|
||||
data = TYPE();
|
||||
array = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create a 2d array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE create_kokkos(TYPE &data, int n1, int n2, const char *name)
|
||||
{
|
||||
data = TYPE(name,n1,n2);
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE, typename HTYPE>
|
||||
TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2,
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
int n1, int n2, const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
bigint n = 0;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
array[i] = &data.h_view(i,0);
|
||||
n += n2;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE, typename HTYPE>
|
||||
TYPE create_kokkos(TYPE &data, HTYPE &h_data,
|
||||
typename TYPE::value_type **&array, int n1, int n2,
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
bigint n = 0;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
array[i] = &h_data(i,0);
|
||||
n += n2;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
grow or shrink 1st dim of a 2d array
|
||||
last dim must stay the same
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
int n1, int n2, const char *name)
|
||||
{
|
||||
if (array == NULL) return create_kokkos(data,array,n1,n2,name);
|
||||
data.resize(n1,n2);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
array[i] = &data.h_view(i,0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
int n1, const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1);
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
array[i] = &data.h_view(i,0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
int n1, const char *name)
|
||||
{
|
||||
if (array == NULL) return create_kokkos(data,array,n1,name);
|
||||
|
||||
data.resize(n1);
|
||||
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
array[i] = &data.h_view(i,0);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
destroy a 2d array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
|
||||
{
|
||||
if (array == NULL) return;
|
||||
data = TYPE();
|
||||
sfree(array);
|
||||
array = NULL;
|
||||
}
|
|
@ -0,0 +1,585 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "modify_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "update.h"
|
||||
#include "fix.h"
|
||||
#include "compute.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define BIG 1.0e20
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ModifyKokkos::ModifyKokkos(LAMMPS *lmp) : Modify(lmp)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup for run, calls setup() of all fixes and computes
|
||||
called from Verlet, RESPA, Min
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::setup(int vflag)
|
||||
{
|
||||
// compute setup needs to come before fix setup
|
||||
// b/c NH fixes need use DOF of temperature computes
|
||||
|
||||
for (int i = 0; i < ncompute; i++) compute[i]->setup();
|
||||
|
||||
if (update->whichflag == 1)
|
||||
for (int i = 0; i < nfix; i++) {
|
||||
atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
|
||||
atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
|
||||
fix[i]->setup(vflag);
|
||||
}
|
||||
else if (update->whichflag == 2)
|
||||
for (int i = 0; i < nfix; i++) {
|
||||
atomKK->sync(fix[i]->execution_space,fix[i]->datamask_read);
|
||||
atomKK->modified(fix[i]->execution_space,fix[i]->datamask_modify);
|
||||
fix[i]->min_setup(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup pre_exchange call, only for fixes that define pre_exchange
|
||||
called from Verlet, RESPA, Min, and WriteRestart with whichflag = 0
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::setup_pre_exchange()
|
||||
{
|
||||
if (update->whichflag <= 1)
|
||||
for (int i = 0; i < n_pre_exchange; i++) {
|
||||
atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
|
||||
fix[list_pre_exchange[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
|
||||
fix[list_pre_exchange[i]]->datamask_modify);
|
||||
fix[list_pre_exchange[i]]->setup_pre_exchange();
|
||||
}
|
||||
else if (update->whichflag == 2)
|
||||
for (int i = 0; i < n_min_pre_exchange; i++) {
|
||||
atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
|
||||
fix[list_min_pre_exchange[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
|
||||
fix[list_min_pre_exchange[i]]->datamask_modify);
|
||||
fix[list_min_pre_exchange[i]]->min_setup_pre_exchange();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup pre_neighbor call, only for fixes that define pre_neighbor
|
||||
called from Verlet, RESPA
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::setup_pre_neighbor()
|
||||
{
|
||||
if (update->whichflag == 1)
|
||||
for (int i = 0; i < n_pre_neighbor; i++) {
|
||||
atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
|
||||
fix[list_pre_neighbor[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
|
||||
fix[list_pre_neighbor[i]]->datamask_modify);
|
||||
fix[list_pre_neighbor[i]]->setup_pre_neighbor();
|
||||
}
|
||||
else if (update->whichflag == 2)
|
||||
for (int i = 0; i < n_min_pre_neighbor; i++) {
|
||||
atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
|
||||
fix[list_min_pre_neighbor[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
|
||||
fix[list_min_pre_neighbor[i]]->datamask_modify);
|
||||
fix[list_min_pre_neighbor[i]]->min_setup_pre_neighbor();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup pre_force call, only for fixes that define pre_force
|
||||
called from Verlet, RESPA, Min
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::setup_pre_force(int vflag)
|
||||
{
|
||||
if (update->whichflag == 1)
|
||||
for (int i = 0; i < n_pre_force; i++) {
|
||||
atomKK->sync(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_modify);
|
||||
fix[list_pre_force[i]]->setup_pre_force(vflag);
|
||||
}
|
||||
else if (update->whichflag == 2)
|
||||
for (int i = 0; i < n_min_pre_force; i++) {
|
||||
atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
|
||||
fix[list_min_pre_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
|
||||
fix[list_min_pre_force[i]]->datamask_modify);
|
||||
fix[list_min_pre_force[i]]->min_setup_pre_force(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
1st half of integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::initial_integrate(int vflag)
|
||||
{
|
||||
for (int i = 0; i < n_initial_integrate; i++) {
|
||||
atomKK->sync(fix[list_initial_integrate[i]]->execution_space,
|
||||
fix[list_initial_integrate[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_initial_integrate[i]]->execution_space,
|
||||
fix[list_initial_integrate[i]]->datamask_modify);
|
||||
fix[list_initial_integrate[i]]->initial_integrate(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
post_integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::post_integrate()
|
||||
{
|
||||
for (int i = 0; i < n_post_integrate; i++) {
|
||||
atomKK->sync(fix[list_post_integrate[i]]->execution_space,
|
||||
fix[list_post_integrate[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_post_integrate[i]]->execution_space,
|
||||
fix[list_post_integrate[i]]->datamask_modify);
|
||||
fix[list_post_integrate[i]]->post_integrate();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pre_exchange call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::pre_exchange()
|
||||
{
|
||||
for (int i = 0; i < n_pre_exchange; i++) {
|
||||
atomKK->sync(fix[list_pre_exchange[i]]->execution_space,
|
||||
fix[list_pre_exchange[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_exchange[i]]->execution_space,
|
||||
fix[list_pre_exchange[i]]->datamask_modify);
|
||||
fix[list_pre_exchange[i]]->pre_exchange();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pre_neighbor call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::pre_neighbor()
|
||||
{
|
||||
for (int i = 0; i < n_pre_neighbor; i++) {
|
||||
atomKK->sync(fix[list_pre_neighbor[i]]->execution_space,
|
||||
fix[list_pre_neighbor[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_neighbor[i]]->execution_space,
|
||||
fix[list_pre_neighbor[i]]->datamask_modify);
|
||||
fix[list_pre_neighbor[i]]->pre_neighbor();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pre_force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::pre_force(int vflag)
|
||||
{
|
||||
for (int i = 0; i < n_pre_force; i++) {
|
||||
atomKK->sync(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_modify);
|
||||
fix[list_pre_force[i]]->pre_force(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
post_force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::post_force(int vflag)
|
||||
{
|
||||
for (int i = 0; i < n_post_force; i++) {
|
||||
atomKK->sync(fix[list_post_force[i]]->execution_space,
|
||||
fix[list_post_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_post_force[i]]->execution_space,
|
||||
fix[list_post_force[i]]->datamask_modify);
|
||||
fix[list_post_force[i]]->post_force(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
2nd half of integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::final_integrate()
|
||||
{
|
||||
for (int i = 0; i < n_final_integrate; i++) {
|
||||
atomKK->sync(fix[list_final_integrate[i]]->execution_space,
|
||||
fix[list_final_integrate[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_final_integrate[i]]->execution_space,
|
||||
fix[list_final_integrate[i]]->datamask_modify);
|
||||
fix[list_final_integrate[i]]->final_integrate();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
end-of-timestep call, only for relevant fixes
|
||||
only call fix->end_of_step() on timesteps that are multiples of nevery
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::end_of_step()
|
||||
{
|
||||
for (int i = 0; i < n_end_of_step; i++)
|
||||
if (update->ntimestep % end_of_step_every[i] == 0) {
|
||||
atomKK->sync(fix[list_end_of_step[i]]->execution_space,
|
||||
fix[list_end_of_step[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_end_of_step[i]]->execution_space,
|
||||
fix[list_end_of_step[i]]->datamask_modify);
|
||||
fix[list_end_of_step[i]]->end_of_step();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
thermo energy call, only for relevant fixes
|
||||
called by Thermo class
|
||||
compute_scalar() is fix call to return energy
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double ModifyKokkos::thermo_energy()
|
||||
{
|
||||
double energy = 0.0;
|
||||
for (int i = 0; i < n_thermo_energy; i++) {
|
||||
atomKK->sync(fix[list_thermo_energy[i]]->execution_space,
|
||||
fix[list_thermo_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_thermo_energy[i]]->execution_space,
|
||||
fix[list_thermo_energy[i]]->datamask_modify);
|
||||
energy += fix[list_thermo_energy[i]]->compute_scalar();
|
||||
}
|
||||
return energy;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
post_run call
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::post_run()
|
||||
{
|
||||
for (int i = 0; i < nfix; i++) {
|
||||
atomKK->sync(fix[i]->execution_space,
|
||||
fix[i]->datamask_read);
|
||||
atomKK->modified(fix[i]->execution_space,
|
||||
fix[i]->datamask_modify);
|
||||
fix[i]->post_run();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup rRESPA pre_force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::setup_pre_force_respa(int vflag, int ilevel)
|
||||
{
|
||||
for (int i = 0; i < n_pre_force; i++) {
|
||||
atomKK->sync(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_force[i]]->execution_space,
|
||||
fix[list_pre_force[i]]->datamask_modify);
|
||||
fix[list_pre_force[i]]->setup_pre_force_respa(vflag,ilevel);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
1st half of rRESPA integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::initial_integrate_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
for (int i = 0; i < n_initial_integrate_respa; i++) {
|
||||
atomKK->sync(fix[list_initial_integrate_respa[i]]->execution_space,
|
||||
fix[list_initial_integrate_respa[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_initial_integrate_respa[i]]->execution_space,
|
||||
fix[list_initial_integrate_respa[i]]->datamask_modify);
|
||||
fix[list_initial_integrate_respa[i]]->
|
||||
initial_integrate_respa(vflag,ilevel,iloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
rRESPA post_integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::post_integrate_respa(int ilevel, int iloop)
|
||||
{
|
||||
for (int i = 0; i < n_post_integrate_respa; i++) {
|
||||
atomKK->sync(fix[list_post_integrate_respa[i]]->execution_space,
|
||||
fix[list_post_integrate_respa[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_post_integrate_respa[i]]->execution_space,
|
||||
fix[list_post_integrate_respa[i]]->datamask_modify);
|
||||
fix[list_post_integrate_respa[i]]->post_integrate_respa(ilevel,iloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
rRESPA pre_force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::pre_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
for (int i = 0; i < n_pre_force_respa; i++) {
|
||||
atomKK->sync(fix[list_pre_force_respa[i]]->execution_space,
|
||||
fix[list_pre_force_respa[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_pre_force_respa[i]]->execution_space,
|
||||
fix[list_pre_force_respa[i]]->datamask_modify);
|
||||
fix[list_pre_force_respa[i]]->pre_force_respa(vflag,ilevel,iloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
rRESPA post_force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
for (int i = 0; i < n_post_force_respa; i++) {
|
||||
atomKK->sync(fix[list_post_force_respa[i]]->execution_space,
|
||||
fix[list_post_force_respa[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_post_force_respa[i]]->execution_space,
|
||||
fix[list_post_force_respa[i]]->datamask_modify);
|
||||
fix[list_post_force_respa[i]]->post_force_respa(vflag,ilevel,iloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
2nd half of rRESPA integrate call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::final_integrate_respa(int ilevel, int iloop)
|
||||
{
|
||||
for (int i = 0; i < n_final_integrate_respa; i++) {
|
||||
atomKK->sync(fix[list_final_integrate_respa[i]]->execution_space,
|
||||
fix[list_final_integrate_respa[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_final_integrate_respa[i]]->execution_space,
|
||||
fix[list_final_integrate_respa[i]]->datamask_modify);
|
||||
fix[list_final_integrate_respa[i]]->final_integrate_respa(ilevel,iloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
minimizer pre-exchange call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_pre_exchange()
|
||||
{
|
||||
for (int i = 0; i < n_min_pre_exchange; i++) {
|
||||
atomKK->sync(fix[list_min_pre_exchange[i]]->execution_space,
|
||||
fix[list_min_pre_exchange[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_exchange[i]]->execution_space,
|
||||
fix[list_min_pre_exchange[i]]->datamask_modify);
|
||||
fix[list_min_pre_exchange[i]]->min_pre_exchange();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
minimizer pre-neighbor call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_pre_neighbor()
|
||||
{
|
||||
for (int i = 0; i < n_min_pre_neighbor; i++) {
|
||||
atomKK->sync(fix[list_min_pre_neighbor[i]]->execution_space,
|
||||
fix[list_min_pre_neighbor[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_neighbor[i]]->execution_space,
|
||||
fix[list_min_pre_neighbor[i]]->datamask_modify);
|
||||
fix[list_min_pre_neighbor[i]]->min_pre_neighbor();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
minimizer pre-force call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_pre_force(int vflag)
|
||||
{
|
||||
for (int i = 0; i < n_min_pre_force; i++) {
|
||||
atomKK->sync(fix[list_min_pre_force[i]]->execution_space,
|
||||
fix[list_min_pre_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_pre_force[i]]->execution_space,
|
||||
fix[list_min_pre_force[i]]->datamask_modify);
|
||||
fix[list_min_pre_force[i]]->min_pre_force(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
minimizer force adjustment call, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_post_force(int vflag)
|
||||
{
|
||||
for (int i = 0; i < n_min_post_force; i++) {
|
||||
atomKK->sync(fix[list_min_post_force[i]]->execution_space,
|
||||
fix[list_min_post_force[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_post_force[i]]->execution_space,
|
||||
fix[list_min_post_force[i]]->datamask_modify);
|
||||
fix[list_min_post_force[i]]->min_post_force(vflag);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
minimizer energy/force evaluation, only for relevant fixes
|
||||
return energy and forces on extra degrees of freedom
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double ModifyKokkos::min_energy(double *fextra)
|
||||
{
|
||||
int ifix,index;
|
||||
|
||||
index = 0;
|
||||
double eng = 0.0;
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
ifix = list_min_energy[i];
|
||||
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
|
||||
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
|
||||
eng += fix[ifix]->min_energy(&fextra[index]);
|
||||
index += fix[ifix]->min_dof();
|
||||
}
|
||||
return eng;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
store current state of extra dof, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_store()
|
||||
{
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
fix[list_min_energy[i]]->min_store();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
mange state of extra dof on a stack, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_clearstore()
|
||||
{
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
fix[list_min_energy[i]]->min_clearstore();
|
||||
}
|
||||
}
|
||||
|
||||
void ModifyKokkos::min_pushstore()
|
||||
{
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
fix[list_min_energy[i]]->min_pushstore();
|
||||
}
|
||||
}
|
||||
|
||||
void ModifyKokkos::min_popstore()
|
||||
{
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
fix[list_min_energy[i]]->min_popstore();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
displace extra dof along vector hextra, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ModifyKokkos::min_step(double alpha, double *hextra)
|
||||
{
|
||||
int ifix,index;
|
||||
|
||||
index = 0;
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
ifix = list_min_energy[i];
|
||||
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
|
||||
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
|
||||
fix[ifix]->min_step(alpha,&hextra[index]);
|
||||
index += fix[ifix]->min_dof();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute max allowed step size along vector hextra, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double ModifyKokkos::max_alpha(double *hextra)
|
||||
{
|
||||
int ifix,index;
|
||||
|
||||
double alpha = BIG;
|
||||
index = 0;
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
ifix = list_min_energy[i];
|
||||
atomKK->sync(fix[ifix]->execution_space,fix[ifix]->datamask_read);
|
||||
atomKK->modified(fix[ifix]->execution_space,fix[ifix]->datamask_modify);
|
||||
double alpha_one = fix[ifix]->max_alpha(&hextra[index]);
|
||||
alpha = MIN(alpha,alpha_one);
|
||||
index += fix[ifix]->min_dof();
|
||||
}
|
||||
return alpha;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
extract extra dof for minimization, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int ModifyKokkos::min_dof()
|
||||
{
|
||||
int ndof = 0;
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
ndof += fix[list_min_energy[i]]->min_dof();
|
||||
}
|
||||
return ndof;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reset reference state of fix, only for relevant fixes
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int ModifyKokkos::min_reset_ref()
|
||||
{
|
||||
int itmp,itmpall;
|
||||
itmpall = 0;
|
||||
for (int i = 0; i < n_min_energy; i++) {
|
||||
atomKK->sync(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_read);
|
||||
atomKK->modified(fix[list_min_energy[i]]->execution_space,
|
||||
fix[list_min_energy[i]]->datamask_modify);
|
||||
itmp = fix[list_min_energy[i]]->min_reset_ref();
|
||||
if (itmp) itmpall = 1;
|
||||
}
|
||||
return itmpall;
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_MODIFY_KOKKOS_H
|
||||
#define LMP_MODIFY_KOKKOS_H
|
||||
|
||||
#include "modify.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ModifyKokkos : public Modify {
|
||||
public:
|
||||
ModifyKokkos(class LAMMPS *);
|
||||
~ModifyKokkos() {}
|
||||
void setup(int);
|
||||
void setup_pre_exchange();
|
||||
void setup_pre_neighbor();
|
||||
void setup_pre_force(int);
|
||||
void initial_integrate(int);
|
||||
void post_integrate();
|
||||
void pre_decide();
|
||||
void pre_exchange();
|
||||
void pre_neighbor();
|
||||
void pre_force(int);
|
||||
void post_force(int);
|
||||
void final_integrate();
|
||||
void end_of_step();
|
||||
double thermo_energy();
|
||||
void post_run();
|
||||
|
||||
void setup_pre_force_respa(int, int);
|
||||
void initial_integrate_respa(int, int, int);
|
||||
void post_integrate_respa(int, int);
|
||||
void pre_force_respa(int, int, int);
|
||||
void post_force_respa(int, int, int);
|
||||
void final_integrate_respa(int, int);
|
||||
|
||||
void min_pre_exchange();
|
||||
void min_pre_neighbor();
|
||||
void min_pre_force(int);
|
||||
void min_post_force(int);
|
||||
|
||||
double min_energy(double *);
|
||||
void min_store();
|
||||
void min_step(double, double *);
|
||||
void min_clearstore();
|
||||
void min_pushstore();
|
||||
void min_popstore();
|
||||
double max_alpha(double *);
|
||||
int min_dof();
|
||||
int min_reset_ref();
|
||||
|
||||
protected:
|
||||
class AtomKokkos *atomKK;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,507 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH>
|
||||
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
{
|
||||
const int nall = includegroup?atom->nfirst:atom->nlocal;
|
||||
list->grow(nall);
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nall,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
bboxhi,bboxlo);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.d_numneigh = list->d_numneigh;
|
||||
}
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
#if DEVICE==2
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#if DEVICE==2
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
#endif
|
||||
} else {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#if DEVICE==2
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
#endif
|
||||
}
|
||||
DeviceType::fence();
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = nall;
|
||||
list->gnum = 0;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < bins.dimension_1()) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
resize() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh,int GhostNewton>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
build_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
int n = 0;
|
||||
|
||||
// get subview of neighbors of i
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
if(HalfNeigh)
|
||||
for(int m = 0; m < c_bincount(ibin); m++) {
|
||||
const int j = c_bins(ibin,m);
|
||||
// printf("%i %i %i\n",i,ibin,m,c_bincount(ibin),j);
|
||||
const int jtype = type(j);
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
// get subview of jbin
|
||||
if(!GhostNewton&&HalfNeigh&&(ibin==jbin)) continue;
|
||||
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
//if(i==0)
|
||||
//printf("%i %i %i %i %i %i %i\n",i,jbin,m,c_bincount(jbin),j,k,stencil[k]);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
//if(Exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
//if(i==0)
|
||||
//printf("%i %i %lf %lf NEIGHS\n",i,j,rsq,cutneighsq(itype,jtype));
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = j;
|
||||
n++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
#if DEVICE==2
|
||||
extern __shared__ X_FLOAT sharedmem[];
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType> template<int HalfNeigh>
|
||||
__device__ inline
|
||||
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(DeviceType dev) const
|
||||
{
|
||||
/* loop over atoms in i's bin,
|
||||
*/
|
||||
const int atoms_per_bin = c_bins.dimension_1();
|
||||
const int BINS_PER_TEAM = blockDim.x/atoms_per_bin;
|
||||
const int MY_BIN = threadIdx.x/atoms_per_bin;
|
||||
const int MY_II = threadIdx.x%atoms_per_bin;
|
||||
|
||||
const int ibin = (blockIdx.x)*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >=c_bincount.dimension_0()) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
other_x = other_x + 5*atoms_per_bin*MY_BIN;
|
||||
|
||||
int* other_id = (int*) &other_x[4 * atoms_per_bin];
|
||||
|
||||
int bincount_current = c_bincount[ibin];
|
||||
|
||||
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
|
||||
/* if necessary, goto next page and add pages */
|
||||
|
||||
int n = 0;
|
||||
|
||||
X_FLOAT xtmp;
|
||||
X_FLOAT ytmp;
|
||||
X_FLOAT ztmp;
|
||||
int itype;
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
|
||||
|
||||
if(i >= 0) {
|
||||
xtmp = x(i, 0);
|
||||
ytmp = x(i, 1);
|
||||
ztmp = x(i, 2);
|
||||
itype = type(i);
|
||||
other_x[MY_II] = xtmp;
|
||||
other_x[MY_II + atoms_per_bin] = ytmp;
|
||||
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
|
||||
other_x[MY_II + 3 * atoms_per_bin] = itype;
|
||||
}
|
||||
other_id[MY_II] = i;
|
||||
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
|
||||
|
||||
if(test) return;
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
#pragma unroll 4
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
int j = other_id[m];
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
//if(j==i) continue;
|
||||
if((j == i) || (HalfNeigh && (j < i))) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
if(ibin == jbin) continue;
|
||||
|
||||
bincount_current = c_bincount[jbin];
|
||||
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
|
||||
|
||||
if(j >= 0) {
|
||||
other_x[MY_II] = x(j, 0);
|
||||
other_x[MY_II + atoms_per_bin] = x(j, 1);
|
||||
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
|
||||
other_x[MY_II + 3 * atoms_per_bin] = type(j);
|
||||
}
|
||||
|
||||
other_id[MY_II] = j;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
#pragma unroll 8
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
const int j = other_id[m];
|
||||
|
||||
if(HalfNeigh && (j < i)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if((rsq <= cutneighsq(itype,jtype)) && (n < neigh_list.maxneighs)) neighbors_i(n++) = j;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class DeviceType>
|
||||
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
{
|
||||
const int nall = includegroup?atom->nfirst:atom->nlocal;
|
||||
list->grow(nall);
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nall,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
bboxhi,bboxlo);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
DeviceType::fence();
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.d_numneigh = list->d_numneigh;
|
||||
}
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
#if DEVICE==2
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::ParallelWorkRequest config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#if DEVICE==2
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
} else {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#if DEVICE==2
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
}
|
||||
DeviceType::fence();
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = nall;
|
||||
list->gnum = 0;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int ClusterSize>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
build_cluster_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
int n = 0;
|
||||
|
||||
// get subview of neighbors of i
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
bool skip = i == j;
|
||||
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
|
||||
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
|
||||
|
||||
if(!skip) {
|
||||
const int jtype = type(j);
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
|
||||
n++;
|
||||
//m += ClusterSize - j&(ClusterSize-1)-1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "memory.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NSQ,BIN,MULTI};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
void NeighListKokkos<Device>::clean_copy()
|
||||
{
|
||||
ilist = NULL;
|
||||
numneigh = NULL;
|
||||
firstneigh = NULL;
|
||||
firstdouble = NULL;
|
||||
dnum = 0;
|
||||
iskip = NULL;
|
||||
ijskip = NULL;
|
||||
|
||||
ipage = NULL;
|
||||
dpage = NULL;
|
||||
maxstencil = 0;
|
||||
ghostflag = 0;
|
||||
maxstencil_multi = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
void NeighListKokkos<Device>::grow(int nmax)
|
||||
{
|
||||
// skip if this list is already long enough to store nmax atoms
|
||||
|
||||
if (nmax <= maxatoms) return;
|
||||
maxatoms = nmax;
|
||||
|
||||
d_ilist =
|
||||
typename ArrayTypes<Device>::t_int_1d("neighlist:ilist",maxatoms);
|
||||
d_numneigh =
|
||||
typename ArrayTypes<Device>::t_int_1d("neighlist:numneigh",maxatoms);
|
||||
d_neighbors =
|
||||
typename ArrayTypes<Device>::t_neighbors_2d("neighlist:neighbors",
|
||||
maxatoms,maxneighs);
|
||||
|
||||
memory->sfree(firstneigh);
|
||||
memory->sfree(firstdouble);
|
||||
|
||||
firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *),
|
||||
"neighlist:firstneigh");
|
||||
if (dnum)
|
||||
firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *),
|
||||
"neighlist:firstdouble");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (style == BIN) {
|
||||
if (smax > maxstencil) {
|
||||
maxstencil = smax;
|
||||
d_stencil =
|
||||
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
|
||||
"neighlist:stencil");
|
||||
if (ghostflag) {
|
||||
memory->destroy(stencilxyz);
|
||||
memory->create(stencilxyz,maxstencil,3,"neighlist:stencilxyz");
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
int n = atom->ntypes;
|
||||
if (maxstencil_multi == 0) {
|
||||
nstencil_multi = new int[n+1];
|
||||
stencil_multi = new int*[n+1];
|
||||
distsq_multi = new double*[n+1];
|
||||
for (i = 1; i <= n; i++) {
|
||||
nstencil_multi[i] = 0;
|
||||
stencil_multi[i] = NULL;
|
||||
distsq_multi[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (smax > maxstencil_multi) {
|
||||
maxstencil_multi = smax;
|
||||
for (i = 1; i <= n; i++) {
|
||||
memory->destroy(stencil_multi[i]);
|
||||
memory->destroy(distsq_multi[i]);
|
||||
memory->create(stencil_multi[i],maxstencil_multi,
|
||||
"neighlist:stencil_multi");
|
||||
memory->create(distsq_multi[i],maxstencil_multi,
|
||||
"neighlist:distsq_multi");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class NeighListKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
template class NeighListKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,104 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_NEIGH_LIST_KOKKOS_H
|
||||
#define LMP_NEIGH_LIST_KOKKOS_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "neigh_list.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
enum{FULL,HALFTHREAD,HALF,N2,FULLCLUSTER};
|
||||
|
||||
class AtomNeighbors
|
||||
{
|
||||
public:
|
||||
const int num_neighs;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighbors(int* const & firstneigh, const int & _num_neighs,
|
||||
const int & stride):
|
||||
_firstneigh(firstneigh), _stride(stride), num_neighs(_num_neighs) {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int& operator()(const int &i) const {
|
||||
return _firstneigh[i*_stride];
|
||||
}
|
||||
|
||||
private:
|
||||
int* const _firstneigh;
|
||||
const int _stride;
|
||||
};
|
||||
|
||||
class AtomNeighborsConst
|
||||
{
|
||||
public:
|
||||
const int* const _firstneigh;
|
||||
const int numneigh;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighborsConst(int* const & firstneigh, const int & _numneigh,
|
||||
const int & stride):
|
||||
_firstneigh(firstneigh), _stride(stride), numneigh(_numneigh) {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const int& operator()(const int &i) const {
|
||||
return _firstneigh[i*_stride];
|
||||
}
|
||||
|
||||
private:
|
||||
//const int* const _firstneigh;
|
||||
const int _stride;
|
||||
};
|
||||
|
||||
template<class Device>
|
||||
class NeighListKokkos: public NeighList {
|
||||
int _stride;
|
||||
|
||||
public:
|
||||
int maxneighs;
|
||||
|
||||
void clean_copy();
|
||||
void grow(int nmax);
|
||||
typename ArrayTypes<Device>::t_neighbors_2d d_neighbors;
|
||||
typename ArrayTypes<Device>::t_int_1d d_ilist; // local indices of I atoms
|
||||
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
|
||||
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
|
||||
|
||||
NeighListKokkos(class LAMMPS *lmp):
|
||||
NeighList(lmp) {_stride = 1; maxneighs = 16;};
|
||||
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighbors get_neighbors(const int &i) const {
|
||||
return AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
|
||||
&d_neighbors(i,1)-&d_neighbors(i,0));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighborsConst get_neighbors_const(const int &i) const {
|
||||
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
|
||||
&d_neighbors(i,1)-&d_neighbors(i,0));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int& num_neighs(const int & i) const {
|
||||
return d_numneigh(i);
|
||||
}
|
||||
void stencil_allocate(int smax, int style);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,269 @@
|
|||
;/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "pair.h"
|
||||
#include "neigh_request.h"
|
||||
#include "memory.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp)
|
||||
{
|
||||
atoms_per_bin = 16;
|
||||
|
||||
nlist_host = 0;
|
||||
lists_host = NULL;
|
||||
pair_build_host = NULL;
|
||||
stencil_create_host = NULL;
|
||||
nlist_device = 0;
|
||||
lists_device = NULL;
|
||||
pair_build_device = NULL;
|
||||
stencil_create_device = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NeighborKokkos::~NeighborKokkos()
|
||||
{
|
||||
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
|
||||
cutneighsq = NULL;
|
||||
|
||||
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
|
||||
delete [] pair_build_device;
|
||||
delete [] pair_build_host;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init()
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
Neighbor::init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_cutneighsq_kokkos(int n)
|
||||
{
|
||||
memory->create_kokkos(k_cutneighsq,cutneighsq,n+1,n+1,"neigh:cutneighsq");
|
||||
k_cutneighsq.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int NeighborKokkos::init_lists_kokkos()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
delete [] pair_build_host;
|
||||
delete [] stencil_create_host;
|
||||
nlist_host = 0;
|
||||
|
||||
for (i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
delete [] pair_build_device;
|
||||
delete [] stencil_create_device;
|
||||
nlist_device = 0;
|
||||
|
||||
nlist = 0;
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (requests[i]->kokkos_device) nlist_device++;
|
||||
else if (requests[i]->kokkos_host) nlist_host++;
|
||||
else nlist++;
|
||||
}
|
||||
|
||||
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
|
||||
pair_build_host = new PairPtrHost[nrequest];
|
||||
stencil_create_host = new StencilPtrHost[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_host[i] = NULL;
|
||||
pair_build_host[i] = NULL;
|
||||
stencil_create_host[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_host) continue;
|
||||
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
|
||||
lists_host[i]->index = i;
|
||||
lists_host[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_host[i]);
|
||||
}
|
||||
}
|
||||
|
||||
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
|
||||
pair_build_device = new PairPtrDevice[nrequest];
|
||||
stencil_create_device = new StencilPtrDevice[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_device[i] = NULL;
|
||||
pair_build_device[i] = NULL;
|
||||
stencil_create_device[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_device) continue;
|
||||
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
|
||||
lists_device[i]->index = i;
|
||||
lists_device[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_device[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// return # of non-Kokkos lists
|
||||
|
||||
return nlist;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags1_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]) {
|
||||
lists_host[i]->buildflag = 1;
|
||||
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
|
||||
|
||||
lists_host[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_host[i]->growflag = 0;
|
||||
|
||||
lists_host[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_host[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
|
||||
|
||||
lists_host[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
lists_device[i]->buildflag = 1;
|
||||
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
|
||||
|
||||
lists_device[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_device[i]->growflag = 0;
|
||||
|
||||
lists_device[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_device[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
|
||||
|
||||
lists_device[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags2_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]) {
|
||||
if (lists_host[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_host[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
if (lists_device[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_device[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_grow_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]!=NULL && lists_host[i]->growflag)
|
||||
lists_host[i]->grow(maxatom);
|
||||
if (lists_device[i]!=NULL && lists_device[i]->growflag)
|
||||
lists_device[i]->grow(maxatom);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
{
|
||||
if (rq->kokkos_host != 0) {
|
||||
PairPtrHost pb = NULL;
|
||||
if (rq->full) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0>;
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1>;
|
||||
pair_build_host[index] = pb;
|
||||
return;
|
||||
}
|
||||
if (rq->kokkos_device != 0) {
|
||||
PairPtrDevice pb = NULL;
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1>;
|
||||
pair_build_device[index] = pb;
|
||||
return;
|
||||
}
|
||||
|
||||
Neighbor::choose_build(index,rq);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::build_kokkos(int i)
|
||||
{
|
||||
if (lists_host[blist[i]])
|
||||
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
|
||||
else if (lists_device[blist[i]])
|
||||
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::setup_bins_kokkos(int i)
|
||||
{
|
||||
if (lists_host[slist[i]]) {
|
||||
lists_host[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
|
||||
} else if (lists_device[slist[i]]) {
|
||||
lists_device[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
|
||||
}
|
||||
|
||||
if (i < nslist-1) return;
|
||||
|
||||
if (maxhead > k_bins.d_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
|
||||
}
|
||||
}
|
||||
|
||||
// include to trigger instantiation of templated functions
|
||||
|
||||
#include "neigh_full_kokkos.h"
|
|
@ -0,0 +1,257 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_NEIGHBOR_KOKKOS_H
|
||||
#define LMP_NEIGHBOR_KOKKOS_H
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class Device>
|
||||
class NeighborKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<Device> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<Device> neigh_list;
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<Device> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz) {
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NeighborKokkosExecute() {neigh_list.clean_copy();};
|
||||
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item(const int &i) const;
|
||||
|
||||
template<int ClusterSize>
|
||||
KOKKOS_FUNCTION
|
||||
void build_cluster_Item(const int &i) const;
|
||||
|
||||
#if DEVICE==2
|
||||
template<int HalfNeigh>
|
||||
__device__ inline
|
||||
void build_ItemCuda(Device dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device>
|
||||
struct NeighborKokkosBinAtomsFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
|
||||
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
|
||||
c(_c) {};
|
||||
~NeighborKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binatomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NeighborKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#if DEVICE==2
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (Device dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH>(dev);
|
||||
}
|
||||
size_t shmem_size() const { return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class Device,int ClusterSize>
|
||||
struct NeighborClusterKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_cluster_Item<ClusterSize>(i);
|
||||
}
|
||||
};
|
||||
|
||||
class NeighborKokkos : public Neighbor {
|
||||
public:
|
||||
class AtomKokkos *atomKK;
|
||||
|
||||
int nlist_host; // pairwise neighbor lists on Host
|
||||
NeighListKokkos<LMPHostType> **lists_host;
|
||||
int nlist_device; // pairwise neighbor lists on Device
|
||||
NeighListKokkos<LMPDeviceType> **lists_device;
|
||||
|
||||
NeighborKokkos(class LAMMPS *);
|
||||
~NeighborKokkos();
|
||||
void init();
|
||||
|
||||
private:
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
void init_cutneighsq_kokkos(int);
|
||||
int init_lists_kokkos();
|
||||
void init_list_flags1_kokkos(int);
|
||||
void init_list_flags2_kokkos(int);
|
||||
void init_list_grow_kokkos(int);
|
||||
void choose_build(int, NeighRequest *);
|
||||
void build_kokkos(int);
|
||||
void setup_bins_kokkos(int);
|
||||
|
||||
typedef void (NeighborKokkos::*PairPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *);
|
||||
PairPtrHost *pair_build_host;
|
||||
typedef void (NeighborKokkos::*PairPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *);
|
||||
PairPtrDevice *pair_build_device;
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH>
|
||||
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
template<class DeviceType>
|
||||
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
|
||||
typedef void (NeighborKokkos::*StencilPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *, int, int, int);
|
||||
StencilPtrHost *stencil_create_host;
|
||||
typedef void (NeighborKokkos::*StencilPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
|
||||
StencilPtrDevice *stencil_create_device;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,655 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_KOKKOS_H
|
||||
#define LMP_PAIR_KOKKOS_H
|
||||
|
||||
#include "Kokkos_Macros.hpp"
|
||||
#include "pair.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "Kokkos_Vectorization.hpp"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
|
||||
struct PairComputeFunctor {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairStyle c;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT fxtmp = 0.0;
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
if ((NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
Kokkos::atomic_fetch_add(&c.f(j,0),-delx*fpair);
|
||||
Kokkos::atomic_fetch_add(&c.f(j,1),-dely*fpair);
|
||||
Kokkos::atomic_fetch_add(&c.f(j,2),-delz*fpair);
|
||||
}
|
||||
|
||||
if ((NEIGHFLAG==HALF) && (NEWTON_PAIR || j < c.nlocal)) {
|
||||
c.f(j,0) -= delx*fpair;
|
||||
c.f(j,1) -= dely*fpair;
|
||||
c.f(j,2) -= delz*fpair;
|
||||
}
|
||||
|
||||
if (EVFLAG) {
|
||||
if (c.eflag) {
|
||||
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
if (NEIGHFLAG == HALFTHREAD) {
|
||||
Kokkos::atomic_fetch_add(&c.f(i,0),fxtmp);
|
||||
Kokkos::atomic_fetch_add(&c.f(i,1),fytmp);
|
||||
Kokkos::atomic_fetch_add(&c.f(i,2),fztmp);
|
||||
} else {
|
||||
c.f(i,0) += fxtmp;
|
||||
c.f(i,1) += fytmp;
|
||||
c.f(i,2) += fztmp;
|
||||
}
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
if (EFLAG) {
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
|
||||
if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
if (NEIGHFLAG) {
|
||||
if (NEWTON_PAIR) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
} else {
|
||||
if (i < c.nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
if (j < c.nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (NEWTON_PAIR || i < c.nlocal) {
|
||||
c.d_vatom(i,0) += 0.5*v0;
|
||||
c.d_vatom(i,1) += 0.5*v1;
|
||||
c.d_vatom(i,2) += 0.5*v2;
|
||||
c.d_vatom(i,3) += 0.5*v3;
|
||||
c.d_vatom(i,4) += 0.5*v4;
|
||||
c.d_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
if (NEWTON_PAIR || (NEIGHFLAG && j < c.nlocal)) {
|
||||
c.d_vatom(j,0) += 0.5*v0;
|
||||
c.d_vatom(j,1) += 0.5*v1;
|
||||
c.d_vatom(j,2) += 0.5*v2;
|
||||
c.d_vatom(j,3) += 0.5*v3;
|
||||
c.d_vatom(j,4) += 0.5*v4;
|
||||
c.d_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (c.newton_pair) compute_item<0,1>(i,list);
|
||||
else compute_item<0,0>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(i,list);
|
||||
else
|
||||
energy_virial += compute_item<1,0>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
typedef Kokkos::Vectorization<device_type,NeighClusterSize> vectorization;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairStyle c;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const device_type& dev,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = vectorization::global_thread_rank(dev);
|
||||
|
||||
const X_FLOAT xtmp = c.c_x(i,0);
|
||||
const X_FLOAT ytmp = c.c_x(i,1);
|
||||
const X_FLOAT ztmp = c.c_x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT fxtmp = 0.0;
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int jjj = neighbors_i(jj);
|
||||
|
||||
for (int k = vectorization::begin(); k<NeighClusterSize; k+=vectorization::increment) {
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
|
||||
const int j = (jjj + k)&NEIGHMASK;
|
||||
if((j==i)||(j>=c.nall)) continue;
|
||||
const X_FLOAT delx = xtmp - c.c_x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.c_x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.c_x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (c.eflag) {
|
||||
ev.evdwl += 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += 0.5*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const F_FLOAT fx = vectorization::reduce(fxtmp);
|
||||
const F_FLOAT fy = vectorization::reduce(fytmp);
|
||||
const F_FLOAT fz = vectorization::reduce(fztmp);
|
||||
if(vectorization::is_lane_0(dev)) {
|
||||
c.f(i,0) += fx;
|
||||
c.f(i,1) += fy;
|
||||
c.f(i,2) += fz;
|
||||
}
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
if (EFLAG) {
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
|
||||
if (NEWTON_PAIR || i < c.nlocal) c.eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < c.nlocal) c.eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (i < c.nlocal) {
|
||||
c.d_vatom(i,0) += 0.5*v0;
|
||||
c.d_vatom(i,1) += 0.5*v1;
|
||||
c.d_vatom(i,2) += 0.5*v2;
|
||||
c.d_vatom(i,3) += 0.5*v3;
|
||||
c.d_vatom(i,4) += 0.5*v4;
|
||||
c.d_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const device_type& dev) const {
|
||||
if (c.newton_pair) compute_item<0,1>(dev,list);
|
||||
else compute_item<0,0>(dev,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const device_type& dev, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(dev,list);
|
||||
else
|
||||
energy_virial += compute_item<1,0>(dev,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairStyle c;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& ii,
|
||||
const NeighListKokkos<device_type> &list) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = ii;//list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
|
||||
//const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = c.nall;
|
||||
|
||||
F_FLOAT fxtmp = 0.0;
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = jj;//neighbors_i(jj);
|
||||
if(i==j) continue;
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
|
||||
if (EVFLAG) {
|
||||
if (c.eflag) {
|
||||
ev.evdwl += 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
if (c.COUL_FLAG)
|
||||
ev.ecoul += 0.5*
|
||||
factor_lj * c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
}
|
||||
|
||||
if (c.vflag_either) ev_tally(ev,i,j,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c.f(i,0) += fxtmp;
|
||||
c.f(i,1) += fytmp;
|
||||
c.f(i,2) += fztmp;
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
if (EFLAG) {
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
|
||||
if (i < c.nlocal) c.eatom[i] += epairhalf;
|
||||
if (j < c.nlocal) c.eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (i < c.nlocal) {
|
||||
c.d_vatom(i,0) += 0.5*v0;
|
||||
c.d_vatom(i,1) += 0.5*v1;
|
||||
c.d_vatom(i,2) += 0.5*v2;
|
||||
c.d_vatom(i,3) += 0.5*v3;
|
||||
c.d_vatom(i,4) += 0.5*v4;
|
||||
c.d_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
compute_item<0,0>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
energy_virial += compute_item<1,0>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
if (fpair->neighflag == FULL) {
|
||||
PairComputeFunctor<PairStyle,FULL,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
PairComputeFunctor<PairStyle,HALFTHREAD,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALF) {
|
||||
PairComputeFunctor<PairStyle,HALF,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
PairComputeFunctor<PairStyle,N2,false,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
|
||||
else Kokkos::parallel_for(fpair->nlocal,ff);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
|
||||
}
|
||||
} else {
|
||||
if (fpair->neighflag == FULL) {
|
||||
PairComputeFunctor<PairStyle,FULL,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
PairComputeFunctor<PairStyle,HALFTHREAD,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == HALF) {
|
||||
PairComputeFunctor<PairStyle,HALF,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
PairComputeFunctor<PairStyle,N2,true,Specialisation >
|
||||
ff(fpair, list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(fpair->nlocal,ff,ev);
|
||||
else Kokkos::parallel_for(fpair->nlocal,ff);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::ParallelWorkRequest(nteams,teamsize),ff,ev);
|
||||
else Kokkos::parallel_for(Kokkos::ParallelWorkRequest(nteams,teamsize),ff);
|
||||
}
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,267 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_lj_cut_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define KOKKOS_CUDA_MAX_THREADS 256
|
||||
#define KOKKOS_CUDA_MIN_BLOCKS 8
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutKokkos<DeviceType>::PairLJCutKokkos(LAMMPS *lmp) : PairLJCut(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
cutsq = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairLJCutKokkos<DeviceType>::~PairLJCutKokkos()
|
||||
{
|
||||
if (allocated) {
|
||||
k_cutsq = DAT::tdual_ffloat_2d();
|
||||
memory->sfree(cutsq);
|
||||
cutsq = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutKokkos<DeviceType>::cleanup_copy() {
|
||||
// WHY needed: this prevents parent copy from deallocating any arrays
|
||||
allocated = 0;
|
||||
cutsq = NULL;
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
|
||||
double evdwl = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_params.template sync<DeviceType>();
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
c_x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
newton_pair = force->newton_pair;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev = pair_compute<PairLJCutKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
DeviceType::fence();
|
||||
|
||||
if (eflag) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
|
||||
const F_FLOAT forcelj = r6inv *
|
||||
((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
|
||||
return forcelj*r2inv;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairLJCutKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
const F_FLOAT r2inv = 1.0/rsq;
|
||||
const F_FLOAT r6inv = r2inv*r2inv*r2inv;
|
||||
return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
|
||||
(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
|
||||
(STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairLJCut::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
memory->destroy(cutsq);
|
||||
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
|
||||
d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJCut::params",n+1,n+1);
|
||||
params = k_params.d_view;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutKokkos<DeviceType>::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg > 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
PairLJCut::settings(1,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairLJCutKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairLJCut::init_style();
|
||||
|
||||
// error if rRESPA with inner levels
|
||||
|
||||
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
|
||||
int respa = 0;
|
||||
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
|
||||
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
|
||||
if (respa)
|
||||
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
|
||||
}
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairLJCutKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairLJCut::init_one(i,j);
|
||||
|
||||
k_params.h_view(i,j).lj1 = lj1[i][j];
|
||||
k_params.h_view(i,j).lj2 = lj2[i][j];
|
||||
k_params.h_view(i,j).lj3 = lj3[i][j];
|
||||
k_params.h_view(i,j).lj4 = lj4[i][j];
|
||||
k_params.h_view(i,j).offset = offset[i][j];
|
||||
k_params.h_view(i,j).cutsq = cutone*cutone;
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
}
|
||||
k_cutsq.h_view(i,j) = cutone*cutone;
|
||||
k_cutsq.template modify<LMPHostType>();
|
||||
k_params.template modify<LMPHostType>();
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template class PairLJCutKokkos<LMPDeviceType>;
|
||||
#if DEVICE==2
|
||||
template class PairLJCutKokkos<LMPHostType>;
|
||||
#endif
|
|
@ -0,0 +1,112 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/kk,PairLJCutKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/kk/device,PairLJCutKokkos<LMPDeviceType>)
|
||||
PairStyle(lj/cut/kk/host,PairLJCutKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_KOKKOS_H
|
||||
#define LMP_PAIR_LJ_CUT_KOKKOS_H
|
||||
|
||||
#include "pair_kokkos.h"
|
||||
#include "pair_lj_cut.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairLJCutKokkos : public PairLJCut {
|
||||
public:
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutKokkos(class LAMMPS *);
|
||||
~PairLJCutKokkos();
|
||||
|
||||
void compute(int, int);
|
||||
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
|
||||
struct params_lj{
|
||||
params_lj(){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
params_lj(int i){cutsq=0,lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
F_FLOAT cutsq,lj1,lj2,lj3,lj4,offset;
|
||||
};
|
||||
|
||||
protected:
|
||||
void cleanup_copy();
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
typename Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>::t_dev_const params;
|
||||
params_lj m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; // hardwired to space for 15 atom types
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array c_x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
//typename ArrayTypes<DeviceType>::t_ffloat_1d special_lj;
|
||||
|
||||
int newton_pair;
|
||||
|
||||
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
class AtomKokkos *atomKK;
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
|
||||
void allocate();
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULL,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,352 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(table/kk,PairTableKokkos<LMPDeviceType>)
|
||||
PairStyle(table/kk/device,PairTableKokkos<LMPDeviceType>)
|
||||
PairStyle(table/kk/host,PairTableKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_TABLE_KOKKOS_H
|
||||
#define LMP_PAIR_TABLE_KOKKOS_H
|
||||
|
||||
#include "pair.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class Device,int TABSTYLE>
|
||||
struct S_TableCompute {
|
||||
enum {TabStyle = TABSTYLE};
|
||||
};
|
||||
|
||||
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
|
||||
class PairTableComputeFunctor;
|
||||
|
||||
template<class DeviceType>
|
||||
class PairTableKokkos : public Pair {
|
||||
public:
|
||||
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
|
||||
PairTableKokkos(class LAMMPS *);
|
||||
virtual ~PairTableKokkos();
|
||||
|
||||
virtual void compute(int, int);
|
||||
|
||||
template<int TABSTYLE>
|
||||
void compute_style(int, int);
|
||||
|
||||
/*template<int EVFLAG, int NEIGHFLAG, int NEWTON_PAIR, int TABSTYLE>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const int& i,
|
||||
const NeighListKokkos<DeviceType> &list) const;
|
||||
*/
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
void *extract(const char *, int &);
|
||||
|
||||
void init_style();
|
||||
|
||||
|
||||
protected:
|
||||
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
|
||||
|
||||
int tabstyle,tablength;
|
||||
/*struct TableDeviceConst {
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_randomread tabindex;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread nshiftbits,nmask;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
|
||||
};*/
|
||||
//Its faster not to use texture fetch if the number of tables is less than 32!
|
||||
struct TableDeviceConst {
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
struct TableDevice {
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
struct TableHost {
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
|
||||
typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d nshiftbits,nmask;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
|
||||
typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
|
||||
};
|
||||
|
||||
struct Table {
|
||||
int ninput,rflag,fpflag,match,ntablebits;
|
||||
int nshiftbits,nmask;
|
||||
double rlo,rhi,fplo,fphi,cut;
|
||||
double *rfile,*efile,*ffile;
|
||||
double *e2file,*f2file;
|
||||
double innersq,delta,invdelta,deltasq6;
|
||||
double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
|
||||
};
|
||||
int ntables;
|
||||
Table *tables;
|
||||
TableDeviceConst d_table_const;
|
||||
TableDevice* d_table;
|
||||
TableHost* h_table;
|
||||
|
||||
int **tabindex;
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
|
||||
|
||||
void allocate();
|
||||
void read_table(Table *, char *, char *);
|
||||
void param_extract(Table *, char *);
|
||||
void bcast_table(Table *);
|
||||
void spline_table(Table *);
|
||||
void compute_table(Table *);
|
||||
void null_table(Table *);
|
||||
void free_table(Table *);
|
||||
void spline(double *, double *, int, double, double, double *);
|
||||
double splint(double *, double *, double *, int, double);
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array_const c_x;
|
||||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
protected:
|
||||
int nlocal,nall,eflag,vflag,neighflag,newton_pair;
|
||||
class AtomKokkos *atomKK;
|
||||
int update_table;
|
||||
void create_kokkos_tables();
|
||||
void cleanup_copy();
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
/*template<int FULL_NEIGH>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
*/
|
||||
};
|
||||
/*
|
||||
template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
|
||||
struct PairTableComputeFunctor {
|
||||
typedef DeviceType device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairTableKokkos<DeviceType> c;
|
||||
NeighListKokkos<DeviceType> list;
|
||||
|
||||
PairTableComputeFunctor(PairTableKokkos<DeviceType>* c_ptr,
|
||||
NeighListKokkos<DeviceType>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairTableComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
if (c.newton_pair) c.template compute_item<0,NEIGHFLAG,1,TABSTYLE>(i,list);
|
||||
else c.template compute_item<0,NEIGHFLAG,0,TABSTYLE>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += c.template compute_item<1,NEIGHFLAG,1,TABSTYLE>(i,list);
|
||||
else
|
||||
energy_virial += c.template compute_item<1,NEIGHFLAG,0,TABSTYLE>(i,list);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.evdwl = 0;
|
||||
update.ecoul = 0;
|
||||
update.v[0] = 0;
|
||||
update.v[1] = 0;
|
||||
update.v[2] = 0;
|
||||
update.v[3] = 0;
|
||||
update.v[4] = 0;
|
||||
update.v[5] = 0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join(volatile value_type &update,
|
||||
const volatile value_type &source) {
|
||||
update.evdwl += source.evdwl;
|
||||
update.ecoul += source.ecoul;
|
||||
update.v[0] += source.v[0];
|
||||
update.v[1] += source.v[1];
|
||||
update.v[2] += source.v[2];
|
||||
update.v[3] += source.v[3];
|
||||
update.v[4] += source.v[4];
|
||||
update.v[5] += source.v[5];
|
||||
}
|
||||
};
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Pair distance < table inner cutoff
|
||||
|
||||
Two atoms are closer together than the pairwise table allows.
|
||||
|
||||
E: Pair distance > table outer cutoff
|
||||
|
||||
Two atoms are further apart than the pairwise table allows.
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Unknown table style in pair_style command
|
||||
|
||||
Style of table is invalid for use with pair_style table command.
|
||||
|
||||
E: Illegal number of pair table entries
|
||||
|
||||
There must be at least 2 table entries.
|
||||
|
||||
E: Invalid pair table length
|
||||
|
||||
Length of read-in pair table is invalid
|
||||
|
||||
E: Invalid pair table cutoff
|
||||
|
||||
Cutoffs in pair_coeff command are not valid with read-in pair table.
|
||||
|
||||
E: Bitmapped table in file does not match requested table
|
||||
|
||||
Setting for bitmapped table in pair_coeff command must match table
|
||||
in file exactly.
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: Cannot open file %s
|
||||
|
||||
The specified file cannot be opened. Check that the path and name are
|
||||
correct. If the file is a compressed file, also check that the gzip
|
||||
executable can be found and run.
|
||||
|
||||
E: Did not find keyword in table file
|
||||
|
||||
Keyword used in pair_coeff command was not found in table file.
|
||||
|
||||
E: Bitmapped table is incorrect length in table file
|
||||
|
||||
Number of table entries is not a correct power of 2.
|
||||
|
||||
E: Invalid keyword in pair table parameters
|
||||
|
||||
Keyword used in list of table parameters is not recognized.
|
||||
|
||||
E: Pair table parameters did not set N
|
||||
|
||||
List of pair table parameters must include N setting.
|
||||
|
||||
E: Pair table cutoffs must all be equal to use with KSpace
|
||||
|
||||
When using pair style table with a long-range KSpace solver, the
|
||||
cutoffs for all atom type pairs must all be the same, since the
|
||||
long-range solver starts at that cutoff.
|
||||
|
||||
*/
|
|
@ -0,0 +1,443 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "string.h"
|
||||
#include "verlet_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "atom.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
#include "angle.h"
|
||||
#include "dihedral.h"
|
||||
#include "improper.h"
|
||||
#include "kspace.h"
|
||||
#include "output.h"
|
||||
#include "update.h"
|
||||
#include "modify.h"
|
||||
#include "compute.h"
|
||||
#include "fix.h"
|
||||
#include "timer.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
#include <ctime>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
Verlet(lmp, narg, arg)
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup before run
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void VerletKokkos::setup()
|
||||
{
|
||||
if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n");
|
||||
update->setupflag = 1;
|
||||
|
||||
// setup domain, communication and neighboring
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
atomKK->setup();
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
comm->exchange();
|
||||
if (atomKK->sortfreq > 0) atomKK->sort();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build();
|
||||
neighbor->ncalls = 0;
|
||||
|
||||
// compute all forces
|
||||
|
||||
ev_set(update->ntimestep);
|
||||
force_clear();
|
||||
modify->setup_pre_force(vflag);
|
||||
|
||||
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->kspace) {
|
||||
force->kspace->setup();
|
||||
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||
else force->kspace->compute_dummy(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->newton) comm->reverse_comm();
|
||||
|
||||
modify->setup(vflag);
|
||||
output->setup();
|
||||
update->setupflag = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup without output
|
||||
flag = 0 = just force calculation
|
||||
flag = 1 = reneighbor and force calculation
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void VerletKokkos::setup_minimal(int flag)
|
||||
{
|
||||
update->setupflag = 1;
|
||||
|
||||
// setup domain, communication and neighboring
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
if (flag) {
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
modify->setup_pre_exchange();
|
||||
// debug
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
domain->pbc();
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
comm->exchange();
|
||||
comm->borders();
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
domain->image_check();
|
||||
domain->box_too_small_check();
|
||||
modify->setup_pre_neighbor();
|
||||
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
neighbor->build();
|
||||
neighbor->ncalls = 0;
|
||||
}
|
||||
|
||||
// compute all forces
|
||||
|
||||
ev_set(update->ntimestep);
|
||||
force_clear();
|
||||
modify->setup_pre_force(vflag);
|
||||
|
||||
if (pair_compute_flag) force->pair->compute(eflag,vflag);
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->kspace) {
|
||||
force->kspace->setup();
|
||||
if (kspace_compute_flag) force->kspace->compute(eflag,vflag);
|
||||
else force->kspace->compute_dummy(eflag,vflag);
|
||||
}
|
||||
|
||||
if (force->newton) comm->reverse_comm();
|
||||
|
||||
modify->setup(vflag);
|
||||
update->setupflag = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
run for N steps
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void VerletKokkos::run(int n)
|
||||
{
|
||||
bigint ntimestep;
|
||||
int nflag,sortflag;
|
||||
|
||||
int n_post_integrate = modify->n_post_integrate;
|
||||
int n_pre_exchange = modify->n_pre_exchange;
|
||||
int n_pre_neighbor = modify->n_pre_neighbor;
|
||||
int n_pre_force = modify->n_pre_force;
|
||||
int n_post_force = modify->n_post_force;
|
||||
int n_end_of_step = modify->n_end_of_step;
|
||||
|
||||
if (atomKK->sortfreq > 0) sortflag = 1;
|
||||
else sortflag = 0;
|
||||
|
||||
static double time = 0.0;
|
||||
static int count = 0;
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
Kokkos::Impl::Timer ktimer;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
|
||||
ntimestep = ++update->ntimestep;
|
||||
ev_set(ntimestep);
|
||||
|
||||
// initial time integration
|
||||
|
||||
ktimer.reset();
|
||||
modify->initial_integrate(vflag);
|
||||
time += ktimer.seconds();
|
||||
if (n_post_integrate) modify->post_integrate();
|
||||
|
||||
// regular communication vs neighbor list rebuild
|
||||
|
||||
nflag = neighbor->decide();
|
||||
|
||||
if (nflag == 0) {
|
||||
timer->stamp();
|
||||
comm->forward_comm();
|
||||
timer->stamp(TIME_COMM);
|
||||
} else {
|
||||
// added debug
|
||||
//atomKK->sync(Host,ALL_MASK);
|
||||
//atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
if (n_pre_exchange) modify->pre_exchange();
|
||||
// debug
|
||||
//atomKK->sync(Host,ALL_MASK);
|
||||
//atomKK->modified(Host,ALL_MASK);
|
||||
if (triclinic) domain->x2lamda(atomKK->nlocal);
|
||||
domain->pbc();
|
||||
if (domain->box_change) {
|
||||
domain->reset_box();
|
||||
comm->setup();
|
||||
if (neighbor->style) neighbor->setup_bins();
|
||||
}
|
||||
timer->stamp();
|
||||
|
||||
// added debug
|
||||
//atomKK->sync(Device,ALL_MASK);
|
||||
//atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
comm->exchange();
|
||||
if (sortflag && ntimestep >= atomKK->nextsort) atomKK->sort();
|
||||
comm->borders();
|
||||
|
||||
// added debug
|
||||
//atomKK->sync(Host,ALL_MASK);
|
||||
//atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
if (triclinic) domain->lamda2x(atomKK->nlocal+atomKK->nghost);
|
||||
|
||||
timer->stamp(TIME_COMM);
|
||||
if (n_pre_neighbor) modify->pre_neighbor();
|
||||
neighbor->build();
|
||||
timer->stamp(TIME_NEIGHBOR);
|
||||
}
|
||||
|
||||
// force computations
|
||||
// important for pair to come before bonded contributions
|
||||
// since some bonded potentials tally pairwise energy/virial
|
||||
// and Pair:ev_tally() needs to be called before any tallying
|
||||
|
||||
force_clear();
|
||||
// added for debug
|
||||
//atomKK->k_x.sync<LMPHostType>();
|
||||
//atomKK->k_f.sync<LMPHostType>();
|
||||
//atomKK->k_f.modify<LMPHostType>();
|
||||
if (n_pre_force) modify->pre_force(vflag);
|
||||
|
||||
timer->stamp();
|
||||
|
||||
if (pair_compute_flag) {
|
||||
force->pair->compute(eflag,vflag);
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (force->bond) force->bond->compute(eflag,vflag);
|
||||
if (force->angle) force->angle->compute(eflag,vflag);
|
||||
if (force->dihedral) force->dihedral->compute(eflag,vflag);
|
||||
if (force->improper) force->improper->compute(eflag,vflag);
|
||||
timer->stamp(TIME_BOND);
|
||||
}
|
||||
|
||||
if (kspace_compute_flag) {
|
||||
force->kspace->compute(eflag,vflag);
|
||||
timer->stamp(TIME_KSPACE);
|
||||
}
|
||||
|
||||
// reverse communication of forces
|
||||
|
||||
if (force->newton) {
|
||||
atomKK->sync(Host,F_MASK);
|
||||
comm->reverse_comm();
|
||||
atomKK->modified(Host,F_MASK);
|
||||
timer->stamp(TIME_COMM);
|
||||
}
|
||||
|
||||
// force modifications, final time integration, diagnostics
|
||||
|
||||
ktimer.reset();
|
||||
|
||||
if (n_post_force) modify->post_force(vflag);
|
||||
modify->final_integrate();
|
||||
if (n_end_of_step) modify->end_of_step();
|
||||
|
||||
time += ktimer.seconds();
|
||||
|
||||
// all output
|
||||
|
||||
if (ntimestep == output->next) {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
timer->stamp();
|
||||
output->write(ntimestep);
|
||||
timer->stamp(TIME_OUTPUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
clear force on own & ghost atoms
|
||||
clear other arrays as needed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void VerletKokkos::force_clear()
|
||||
{
|
||||
int i;
|
||||
|
||||
if (external_force_clear) return;
|
||||
|
||||
// clear force on all particles
|
||||
// if either newton flag is set, also include ghosts
|
||||
// when using threads always clear all forces.
|
||||
|
||||
if (neighbor->includegroup == 0) {
|
||||
int nall;
|
||||
if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
|
||||
else nall = atomKK->nlocal;
|
||||
|
||||
size_t nbytes = sizeof(double) * nall;
|
||||
|
||||
if (nbytes) {
|
||||
if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
|
||||
memset_kokkos(atomKK->k_f.view<LMPHostType>());
|
||||
atomKK->modified(Host,F_MASK);
|
||||
} else {
|
||||
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
|
||||
atomKK->modified(Device,F_MASK);
|
||||
}
|
||||
if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes);
|
||||
if (erforceflag) memset(&(atomKK->erforce[0]), 0, nbytes);
|
||||
if (e_flag) memset(&(atomKK->de[0]), 0, nbytes);
|
||||
if (rho_flag) memset(&(atomKK->drho[0]), 0, nbytes);
|
||||
}
|
||||
|
||||
// neighbor includegroup flag is set
|
||||
// clear force only on initial nfirst particles
|
||||
// if either newton flag is set, also include ghosts
|
||||
|
||||
} else {
|
||||
int nall = atomKK->nfirst;
|
||||
if (atomKK->k_f.modified_host > atomKK->k_f.modified_device) {
|
||||
memset_kokkos(atomKK->k_f.view<LMPHostType>());
|
||||
atomKK->modified(Host,F_MASK);
|
||||
} else {
|
||||
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
|
||||
atomKK->modified(Device,F_MASK);
|
||||
}
|
||||
if (torqueflag) {
|
||||
double **torque = atomKK->torque;
|
||||
for (i = 0; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atomKK->erforce;
|
||||
for (i = 0; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atomKK->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atomKK->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
}
|
||||
|
||||
if (force->newton) {
|
||||
nall = atomKK->nlocal + atomKK->nghost;
|
||||
|
||||
if (torqueflag) {
|
||||
double **torque = atomKK->torque;
|
||||
for (i = atomKK->nlocal; i < nall; i++) {
|
||||
torque[i][0] = 0.0;
|
||||
torque[i][1] = 0.0;
|
||||
torque[i][2] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (erforceflag) {
|
||||
double *erforce = atomKK->erforce;
|
||||
for (i = atomKK->nlocal; i < nall; i++) erforce[i] = 0.0;
|
||||
}
|
||||
|
||||
if (e_flag) {
|
||||
double *de = atomKK->de;
|
||||
for (i = 0; i < nall; i++) de[i] = 0.0;
|
||||
}
|
||||
|
||||
if (rho_flag) {
|
||||
double *drho = atomKK->drho;
|
||||
for (i = 0; i < nall; i++) drho[i] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef INTEGRATE_CLASS
|
||||
|
||||
IntegrateStyle(verlet/kk,VerletKokkos)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_VERLET_KOKKOS_H
|
||||
#define LMP_VERLET_KOKKOS_H
|
||||
|
||||
#include "verlet.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class VerletKokkos : public Verlet {
|
||||
public:
|
||||
VerletKokkos(class LAMMPS *, int, char **);
|
||||
~VerletKokkos() {}
|
||||
void setup();
|
||||
void setup_minimal(int);
|
||||
void run(int);
|
||||
|
||||
protected:
|
||||
class AtomKokkos *atomKK;
|
||||
|
||||
void force_clear();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,111 @@
|
|||
# cuda = RedHat Linux box, nvcc for Kokkos, MPICH2, FFTW
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = nvcc
|
||||
CCFLAGS = -g -O3 -arch=sm_20
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = g++
|
||||
LINKFLAGS = -g -O
|
||||
LIB =
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings, OPTIONAL
|
||||
# see possible settings in doc/Section_start.html#2_2 (step 4)
|
||||
|
||||
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||
|
||||
# MPI library, REQUIRED
|
||||
# see discussion in doc/Section_start.html#2_2 (step 5)
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX
|
||||
MPI_PATH =
|
||||
MPI_LIB = -lmpich -lmpl -lpthread
|
||||
|
||||
# FFT library, OPTIONAL
|
||||
# see discussion in doc/Section_start.html#2_2 (step 6)
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC = -DFFT_FFTW
|
||||
FFT_PATH =
|
||||
FFT_LIB = -lfftw
|
||||
|
||||
# JPEG and/or PNG library, OPTIONAL
|
||||
# see discussion in doc/Section_start.html#2_2 (step 7)
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB = -ljpeg
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# no need to edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cu
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
%.o:%.cpp
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
%.d:%.cpp
|
||||
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
DEPENDS = $(OBJ:.o=.d)
|
||||
sinclude $(DEPENDS)
|
|
@ -14,7 +14,7 @@ OBJ = $(SRC:.cpp=.o)
|
|||
# Package variables
|
||||
|
||||
PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
|
||||
kspace manybody mc meam misc molecule mpiio opt peri poems \
|
||||
kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
|
||||
reax replica rigid shock srd voronoi xtc
|
||||
|
||||
PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
|
||||
|
|
|
@ -80,8 +80,10 @@ void AtomVec::init()
|
|||
deform_groupbit = domain->deform_groupbit;
|
||||
h_rate = domain->h_rate;
|
||||
|
||||
if (lmp->cuda != NULL && cudable == false)
|
||||
if (lmp->cuda != NULL && !cudable)
|
||||
error->all(FLERR,"USER-CUDA package requires a cuda enabled atom_style");
|
||||
if (lmp->kokkos != NULL && !kokkosable)
|
||||
error->all(FLERR,"KOKKOS package requires a kokkos enabled atom_style");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -334,7 +334,7 @@ void ComputePropertyLocal::compute_local()
|
|||
|
||||
int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
|
||||
{
|
||||
int i,j,m,n,ii,jj,inum,jnum,itype,jtype;
|
||||
int i,j,m,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
|
@ -358,7 +358,7 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
|
|||
|
||||
double **cutsq = force->pair->cutsq;
|
||||
|
||||
m = n = 0;
|
||||
m = 0;
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
|
|
Loading…
Reference in New Issue