Kokkos neighbor refactor

This commit is contained in:
Stan Moore 2016-12-07 13:00:27 -07:00
parent 7c61dbf5e2
commit 8318c67816
60 changed files with 1742 additions and 1279 deletions

View File

@ -105,11 +105,14 @@ action modify_kokkos.cpp
action modify_kokkos.h
action neigh_bond_kokkos.cpp
action neigh_bond_kokkos.h
action neigh_full_kokkos.h
action neigh_list_kokkos.cpp
action neigh_list_kokkos.h
action neighbor_kokkos.cpp
action neighbor_kokkos.h
action npair_kokkos.cpp
action npair_kokkos.h
action nbin_kokkos.cpp
action nbin_kokkos.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp
@ -169,8 +172,6 @@ action pair_reax_c_kokkos.cpp pair_reax_c.cpp
action pair_reax_c_kokkos.h pair_reax_c.h
action pair_sw_kokkos.cpp pair_sw.cpp
action pair_sw_kokkos.h pair_sw.h
action pair_vashishta_kokkos.cpp pair_vashishta.cpp
action pair_vashishta_kokkos.h pair_vashishta.h
action pair_table_kokkos.cpp
action pair_table_kokkos.h
action pair_tersoff_kokkos.cpp pair_tersoff.cpp

View File

@ -83,13 +83,8 @@ class AtomVecKokkos : public AtomVec {
std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
if (buffer_size == 0) {
buffer = Kokkos::kokkos_malloc<Kokkos::CudaHostPinnedSpace>(src.capacity());
buffer_size = src.capacity();
} else if (buffer_size < src.capacity()) {
if(buffer_size < src.capacity())
buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity());
buffer_size = src.capacity();
}
return mirror_type( buffer ,
src.dimension_0() ,
src.dimension_1() ,
@ -109,13 +104,8 @@ class AtomVecKokkos : public AtomVec {
std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
if (buffer_size == 0) {
buffer = Kokkos::kokkos_malloc<Kokkos::CudaHostPinnedSpace>(src.capacity()*sizeof(typename ViewType::value_type));
buffer_size = src.capacity();
} else if (buffer_size < src.capacity()) {
if(buffer_size < src.capacity())
buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.capacity()*sizeof(typename ViewType::value_type));
buffer_size = src.capacity();
}
mirror_type tmp_view( (typename ViewType::value_type*)buffer ,
src.dimension_0() ,
src.dimension_1() ,

View File

@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
}
}

View File

@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
else
neighflag = HALF;
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"binsize") == 0) {
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_list_kokkos(int m)
{
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
return 1;
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
return 1;
return 0;
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
{
int inum;
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m]) {
inum = nk->lists_host[m]->inum;
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_host[m]->d_ilist;
h_numneigh = nk->lists_host[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
} else if (nk->lists_device[m]) {
inum = nk->lists_device[m]->inum;
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_device[m]->d_ilist;
h_numneigh = nk->lists_device[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];

View File

@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();
void accelerator(int, char **);
int neigh_list_kokkos(int);
int neigh_count(int);
private:
static void my_signal_handler(int);

144
src/KOKKOS/nbin_kokkos.cpp Normal file
View File

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "nbin_kokkos.h"
#include "neighbor.h"
#include "atom_kokkos.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI}; // also in Neighbor
#define SMALL 1.0e-6
#define CUT2BIN_RATIO 100
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
atoms_per_bin = 16;
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}
/* ----------------------------------------------------------------------
setup neighbor binning geometry
bin numbering in each dimension is global:
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
code will work for any binsize
since next(xyz) and stencil extend as far as necessary
binsize = 1/2 of cutoff is roughly optimal
for orthogonal boxes:
a dim must be filled exactly by integer # of bins
in periodic, procs on both sides of PBC must see same bin boundary
in non-periodic, coord2bin() still assumes this by use of nbin xyz
for triclinic boxes:
tilted simulation box cannot contain integer # of bins
stencil & neigh list built differently to account for this
mbinlo = lowest global bin any of my ghost atoms could fall into
mbinhi = highest global bin any of my ghost atoms could fall into
mbin = number of bins I need in a dimension
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
bincount = k_bincount.view<DeviceType>();
last_bin_memory = update->ntimestep;
}
last_bin = update->ntimestep;
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms()
{
h_resize() = 1;
while(h_resize() > 0) {
h_resize() = 0;
deep_copy(d_resize, h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(h_resize, d_resize);
if(h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
bins = k_bins.view<DeviceType>();
c_bins = bins;
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
d_resize() = 1;
}
}
namespace LAMMPS_NS {
template class NBinKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NBinKokkos<LMPHostType>;
#endif
}

153
src/KOKKOS/nbin_kokkos.h Normal file
View File

@ -0,0 +1,153 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(kk/host,
NBinKokkos<LMPHostType>,
NB_KOKKOS_HOST)
NBinStyle(kk/device,
NBinKokkos<LMPDeviceType>,
NB_KOKKOS_DEVICE)
#else
#ifndef LMP_NBIN_KOKKOS_H
#define LMP_NBIN_KOKKOS_H
#include "nbin_standard.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NBinKokkos : public NBinStandard {
public:
typedef ArrayTypes<DeviceType> AT;
NBinKokkos(class LAMMPS *);
~NBinKokkos() {}
void bin_atoms_setup(int);
void bin_atoms();
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
typename AT::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename AT::t_x_array_randomread x;
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
double bboxlo_[3],bboxhi_[3];
};
template<class DeviceType>
struct NPairKokkosBinAtomsFunctor {
typedef DeviceType device_type;
const NBinKokkos<DeviceType> c;
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
c(_c) {};
~NPairKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
ipage = NULL;
dpage = NULL;
maxstencil = 0;
ghostflag = 0;
maxstencil_multi = 0;
maxatoms = 0;
}
/* ---------------------------------------------------------------------- */
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
{
int i;
if (style == BIN) {
if (smax > maxstencil) {
maxstencil = smax;
d_stencil =
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
3,"neighlist:stencilxyz");
}
}
} else {
int n = atom->ntypes;
if (maxstencil_multi == 0) {
nstencil_multi = new int[n+1];
stencil_multi = new int*[n+1];
distsq_multi = new double*[n+1];
for (i = 1; i <= n; i++) {
nstencil_multi[i] = 0;
stencil_multi[i] = NULL;
distsq_multi[i] = NULL;
}
}
if (smax > maxstencil_multi) {
maxstencil_multi = smax;
for (i = 1; i <= n; i++) {
memory->destroy(stencil_multi[i]);
memory->destroy(distsq_multi[i]);
memory->create(stencil_multi[i],maxstencil_multi,
"neighlist:stencil_multi");
memory->create(distsq_multi[i],maxstencil_multi,
"neighlist:distsq_multi");
}
}
}
}
namespace LAMMPS_NS {
template class NeighListKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA

View File

@ -20,7 +20,7 @@
namespace LAMMPS_NS {
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
class AtomNeighbors
{
@ -74,14 +74,12 @@ public:
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
typename ArrayTypes<Device>::t_int_1d d_ilist;
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1;
execution_space = ExecutionSpaceFromDevice<Device>::space;
};
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
KOKKOS_INLINE_FUNCTION
AtomNeighbors get_neighbors(const int &i) const {
@ -99,7 +97,8 @@ public:
int& num_neighs(const int & i) const {
return d_numneigh(i);
}
void stencil_allocate(int smax, int style);
private:
int maxatoms;
};
}

View File

@ -21,11 +21,10 @@
#include "atom_masks.h"
#include "error.h"
#include "kokkos.h"
#include "force.h"
#include "bond.h"
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "style_nbin.h"
#include "style_nstencil.h"
#include "style_npair.h"
#include "style_ntopo.h"
using namespace LAMMPS_NS;
@ -36,18 +35,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
neighbond_host(lmp),neighbond_device(lmp)
{
atoms_per_bin = 16;
nlist_host = 0;
lists_host = NULL;
pair_build_host = NULL;
stencil_create_host = NULL;
nlist_device = 0;
lists_device = NULL;
pair_build_device = NULL;
stencil_create_device = NULL;
device_flag = 0;
bondlist = NULL;
anglelist = NULL;
dihedrallist = NULL;
improperlist = NULL;
}
/* ---------------------------------------------------------------------- */
@ -58,14 +50,6 @@ NeighborKokkos::~NeighborKokkos()
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
cutneighsq = NULL;
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] pair_build_host;
memory->destroy_kokkos(k_ex_type,ex_type);
memory->destroy_kokkos(k_ex1_type,ex1_type);
memory->destroy_kokkos(k_ex2_type,ex2_type);
@ -89,6 +73,11 @@ void NeighborKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Neighbor::init();
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
}
/* ---------------------------------------------------------------------- */
@ -101,158 +90,13 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
void NeighborKokkos::create_kokkos_list(int i)
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
delete [] pair_build_host;
delete [] stencil_create_host;
nlist_host = 0;
for (i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] stencil_create_device;
nlist_device = 0;
nlist = 0;
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_device) nlist_device++;
else if (requests[i]->kokkos_host) nlist_host++;
else nlist++;
}
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
pair_build_host = new PairPtrHost[nrequest];
stencil_create_host = new StencilPtrHost[nrequest];
for (i = 0; i < nrequest; i++) {
lists_host[i] = NULL;
pair_build_host[i] = NULL;
stencil_create_host[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_host) continue;
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
lists_host[i]->index = i;
lists_host[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_host[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_host[i]);
}
}
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
pair_build_device = new PairPtrDevice[nrequest];
stencil_create_device = new StencilPtrDevice[nrequest];
for (i = 0; i < nrequest; i++) {
lists_device[i] = NULL;
pair_build_device[i] = NULL;
stencil_create_device[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_device) continue;
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
lists_device[i]->index = i;
lists_device[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_device[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_device[i]);
}
}
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
// return # of non-Kokkos lists
return nlist;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags1_kokkos(int i)
{
if (style != BIN)
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
if (lists_host[i]) {
lists_host[i]->buildflag = 1;
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
lists_host[i]->growflag = 1;
if (requests[i]->copy) lists_host[i]->growflag = 0;
lists_host[i]->stencilflag = 1;
if (style == NSQ) lists_host[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
lists_host[i]->ghostflag = 0;
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
if (lists_device[i]) {
lists_device[i]->buildflag = 1;
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
lists_device[i]->growflag = 1;
if (requests[i]->copy) lists_device[i]->growflag = 0;
lists_device[i]->stencilflag = 1;
if (style == NSQ) lists_device[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
lists_device[i]->ghostflag = 0;
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags2_kokkos(int i)
{
if (lists_host[i]) {
if (lists_host[i]->buildflag) blist[nblist++] = i;
if (lists_host[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
if (lists_device[i]) {
if (lists_device[i]->buildflag) blist[nblist++] = i;
if (lists_device[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_grow_kokkos(int i)
{
if (lists_host[i]!=NULL && lists_host[i]->growflag)
lists_host[i]->grow(maxatom);
if (lists_device[i]!=NULL && lists_device[i]->growflag)
lists_device[i]->grow(maxatom);
if (requests[i]->kokkos_device) {
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
device_flag = 1;
} else if (requests[i]->kokkos_host)
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
}
/* ---------------------------------------------------------------------- */
@ -281,49 +125,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
k_ex_mol_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
}
pair_build_host[index] = pb;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
}
pair_build_device[index] = pb;
return;
}
Neighbor::choose_build(index,rq);
}
/* ----------------------------------------------------------------------
if any atom moved trigger distance (half of neighbor skin) return 1
shrink trigger distance if box size has changed
@ -337,7 +138,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
int NeighborKokkos::check_distance()
{
if (nlist_device)
if (device_flag)
check_distance_kokkos<LMPDeviceType>();
else
check_distance_kokkos<LMPHostType>();
@ -417,7 +218,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
void NeighborKokkos::build(int topoflag)
{
if (nlist_device)
if (device_flag)
build_kokkos<LMPDeviceType>(topoflag);
else
build_kokkos<LMPHostType>(topoflag);
@ -426,20 +227,30 @@ void NeighborKokkos::build(int topoflag)
template<class DeviceType>
void NeighborKokkos::build_kokkos(int topoflag)
{
if (style != BIN)
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
typedef DeviceType device_type;
int i;
int i,m;
ago = 0;
ncalls++;
lastcall = update->ntimestep;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
// check that using special bond flags will not overflow neigh lists
if (nall > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// store current atom positions and box size if needed
if (dist_check) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x;
int nlocal = atom->nlocal;
if (includegroup) nlocal = atom->nfirst;
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
@ -471,54 +282,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
}
}
// if any lists store neighbors of ghosts:
// invoke grow() if nlocal+nghost exceeds previous list size
// else only invoke grow() if nlocal exceeds previous list size
// only for lists with growflag set and which are perpetual (glist)
// bin atoms for all NBin instances
// not just NBin associated with perpetual lists
// b/c cannot wait to bin occasional lists in build_one() call
// if bin then, atoms may have moved outside of proc domain & bin extent,
// leading to errors or even a crash
if (anyghostlist && atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
} else if (atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
}
// extend atom bin list if necessary
if (style != NSQ && atom->nmax > maxbin) {
maxbin = atom->nmax;
memory->destroy(bins);
memory->create(bins,maxbin,"bins");
}
// check that using special bond flags will not overflow neigh lists
if (atom->nlocal+atom->nghost > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// invoke building of pair and molecular topology neighbor lists
// only for pairwise lists with buildflag set
// blist is for standard neigh lists, otherwise is a Kokkos list
for (i = 0; i < nblist; i++) {
if (lists[blist[i]]) {
atomKK->sync(Host,ALL_MASK);
(this->*pair_build[blist[i]])(lists[blist[i]]);
} else {
if (lists_host[blist[i]])
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
else if (lists_device[blist[i]])
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
if (style != NSQ) {
for (int i = 0; i < nbin; i++) {
neigh_bin[i]->bin_atoms_setup(nall);
neigh_bin[i]->bin_atoms();
}
}
if (atom->molecular && topoflag)
build_topology_kokkos();
// build pairwise lists for all perpetual NPair/NeighList
// grow() with nlocal/nall args so that only realloc if have to
atomKK->sync(Host,ALL_MASK);
for (i = 0; i < npair_perpetual; i++) {
m = plist[i];
lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]);
}
// build topology lists for bonds/angles/etc
if (atom->molecular && topoflag) build_topology();
}
template<class DeviceType>
@ -532,26 +322,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
/* ---------------------------------------------------------------------- */
void NeighborKokkos::setup_bins_kokkos(int i)
{
if (lists_host[slist[i]]) {
lists_host[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
} else if (lists_device[slist[i]]) {
lists_device[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
}
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
if (maxhead > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
@ -575,8 +345,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::init_topology() {
if (device_flag) {
neighbond_device.init_topology_kk();
} else {
neighbond_host.init_topology_kk();
@ -588,8 +358,8 @@ void NeighborKokkos::init_topology_kokkos() {
normally built with pair lists, but USER-CUDA separates them
------------------------------------------------------------------------- */
void NeighborKokkos::build_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::build_topology() {
if (device_flag) {
neighbond_device.build_topology_kk();
k_bondlist = neighbond_device.k_bondlist;
@ -606,38 +376,22 @@ void NeighborKokkos::build_topology_kokkos() {
k_anglelist.modify<LMPDeviceType>();
k_dihedrallist.modify<LMPDeviceType>();
k_improperlist.modify<LMPDeviceType>();
// Transfer topology neighbor lists to Host for non-Kokkos styles
if (force->bond && force->bond->execution_space == Host)
k_bondlist.sync<LMPHostType>();
if (force->angle && force->angle->execution_space == Host)
k_anglelist.sync<LMPHostType>();
if (force->dihedral && force->dihedral->execution_space == Host)
k_dihedrallist.sync<LMPHostType>();
if (force->improper && force->improper->execution_space == Host)
k_improperlist.sync<LMPHostType>();
} else {
} else {
neighbond_host.build_topology_kk();
k_bondlist = neighbond_host.k_bondlist;
k_anglelist = neighbond_host.k_anglelist;
k_dihedrallist = neighbond_host.k_dihedrallist;
k_improperlist = neighbond_host.k_improperlist;
k_bondlist.sync<LMPHostType>();
k_anglelist.sync<LMPHostType>();
k_dihedrallist.sync<LMPHostType>();
k_improperlist.sync<LMPHostType>();
k_bondlist.modify<LMPHostType>();
k_anglelist.modify<LMPHostType>();
k_dihedrallist.modify<LMPHostType>();
k_improperlist.modify<LMPHostType>();
}
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -22,316 +22,6 @@
namespace LAMMPS_NS {
template<class Device>
class NeighborKokkosExecute
{
typedef ArrayTypes<Device> AT;
public:
NeighListKokkos<Device> neigh_list;
const typename AT::t_xfloat_2d_randomread cutneighsq;
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int GhostNewton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
template<int ClusterSize>
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int GhostNewton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class Device>
struct NeighborKokkosBinAtomsFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
c(_c) {};
~NeighborKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
struct NeighborKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
KOKKOS_INLINE_FUNCTION
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<class Device,int HALF_NEIGH>
struct NeighborKokkosBuildFunctorGhost {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
template<class Device,int ClusterSize>
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_cluster_Item<ClusterSize>(i);
}
};
template<class DeviceType>
struct TagNeighborCheckDistance{};
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
public:
typedef int value_type;
int nlist_host; // pairwise neighbor lists on Host
NeighListKokkos<LMPHostType> **lists_host;
int nlist_device; // pairwise neighbor lists on Device
NeighListKokkos<LMPDeviceType> **lists_device;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
NeighborKokkos(class LAMMPS *);
~NeighborKokkos();
void init();
void init_topology();
void build_topology();
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
KOKKOS_INLINE_FUNCTION
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
private:
int atoms_per_bin;
DAT::tdual_xfloat_2d k_cutneighsq;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
private:
DAT::tdual_x_array x;
DAT::tdual_x_array xhold;
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
int device_flag;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void create_kokkos_list(int);
void init_ex_type_kokkos(int);
void init_ex_bit_kokkos();
void init_ex_mol_bit_kokkos();
void choose_build(int, NeighRequest *);
virtual int check_distance();
template<class DeviceType> int check_distance_kokkos();
virtual void build(int);
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
void modify_ex_type_grow_kokkos();
void modify_ex_group_grow_kokkos();
void modify_mol_group_grow_kokkos();
void init_topology_kokkos();
void build_topology_kokkos();
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;
typedef void (NeighborKokkos::*PairPtrDevice)
(class NeighListKokkos<LMPDeviceType> *);
PairPtrDevice *pair_build_device;
template<class DeviceType,int HALF_NEIGH, int GHOST>
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
template<class DeviceType>
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
typedef void (NeighborKokkos::*StencilPtrHost)
(class NeighListKokkos<LMPHostType> *, int, int, int);
StencilPtrHost *stencil_create_host;
typedef void (NeighborKokkos::*StencilPtrDevice)
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
StencilPtrDevice *stencil_create_device;
};
}

746
src/KOKKOS/npair_kokkos.cpp Normal file
View File

@ -0,0 +1,746 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_kokkos.h"
#include "nstencil.h"
#include "force.h"
namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
// general params
newton_pair = force->newton_pair;
k_cutneighsq = neighborKK->k_cutneighsq;
// exclusion info
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex1_group;
k_ex1_bit = neighborKK->k_ex1_group;
k_ex2_bit = neighborKK->k_ex1_group;
k_ex_mol_group = neighborKK->k_ex_mol_group;
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
{
NPair::copy_bin_info();
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
atoms_per_bin = nbKK->atoms_per_bin;
k_bincount = nbKK->k_bincount;
k_bins = nbKK->k_bins;
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
{
NPair::copy_stencil_info();
nstencil = ns->nstencil;
int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++)
k_stencil.h_view(k) = ns->stencil[k];
k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>();
if (GHOST) {
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
}
k_stencilxyz.modify<LMPHostType>();
k_stencilxyz.sync<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
int nall = nlocal;
if (GHOST)
nall += atom->nghost;
list->grow(nall);
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),
nstencil,
k_stencil.view<DeviceType>(),
k_stencilxyz.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
#endif
if (GHOST) {
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
Kokkos::parallel_for(nall, f);
} else {
if (newton_pair) {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
}
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
if (GHOST) {
list->inum = atom->nlocal;
list->gnum = nall - atom->nlocal;
} else {
list->inum = nall;
list->gnum = 0;
}
list->k_ilist.template modify<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
const int n3 = nspecial(i,2);
for (int k = 0; k < n3; k++) {
if (special(i,k) == tag(j)) {
if (k < n1) {
if (special_flag[1] == 0) return -1;
else if (special_flag[1] == 1) return 0;
else return 1;
} else if (k < n2) {
if (special_flag[2] == 0) return -1;
else if (special_flag[2] == 1) return 0;
else return 2;
} else {
if (special_flag[3] == 0) return -1;
else if (special_flag[3] == 1) return 0;
else return 3;
}
}
}
return 0;
};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
if (nex_type && ex_type(itype,jtype)) return 1;
if (nex_group) {
for (m = 0; m < nex_group; m++) {
if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
}
}
if (nex_mol) {
for (m = 0; m < nex_mol; m++)
if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
molecule(i) == molecule(j)) return 1;
}
return 0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int Newton>
void NeighborKokkosExecute<DeviceType>::
build_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
int which = 0;
int moltemplate;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int Newton>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.dimension_1();
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
const int MY_BIN = dev.team_rank()/atoms_per_bin;
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
if(ibin >=c_bincount.dimension_0()) return;
X_FLOAT* other_x = sharedmem;
other_x = other_x + 5*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[4 * atoms_per_bin];
int bincount_current = c_bincount[ibin];
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
int n = 0;
X_FLOAT xtmp;
X_FLOAT ytmp;
X_FLOAT ztmp;
int itype;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
if(i >= 0) {
xtmp = x(i, 0);
ytmp = x(i, 1);
ztmp = x(i, 2);
itype = type(i);
other_x[MY_II] = xtmp;
other_x[MY_II + atoms_per_bin] = ytmp;
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
other_x[MY_II + 3 * atoms_per_bin] = itype;
}
other_id[MY_II] = i;
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
if(test) return;
if(i >= 0 && i < nlocal) {
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
__syncthreads();
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
if(ibin == jbin) continue;
bincount_current = c_bincount[jbin];
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
if(j >= 0) {
other_x[MY_II] = x(j, 0);
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
other_x[MY_II + 3 * atoms_per_bin] = type(j);
}
other_id[MY_II] = j;
__syncthreads();
if(i >= 0 && i < nlocal) {
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
int which = 0;
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
__syncthreads();
}
if(i >= 0 && i < nlocal) {
neigh_list.d_numneigh(i) = n;
neigh_list.d_ilist(i) = i;
}
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
}
}
#endif
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh>
void NeighborKokkosExecute<DeviceType>::
build_Item_Ghost(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
int which = 0;
int moltemplate;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
= d_stencilxyz;
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
// skip i = j
// no molecular test when i = ghost atom
if (i < nlocal) {
const int ibin = coord2bin(xtmp, ytmp, ztmp);
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
if (HalfNeigh && j <= i) continue;
else if (j == i) continue;
const int jtype = type[j];
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
const X_FLOAT delz = ztmp - x(j,2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
} else {
int binxyz[3];
const int ibin = coord2bin(xtmp, ytmp, ztmp, binxyz);
const int xbin = binxyz[0];
const int ybin = binxyz[1];
const int zbin = binxyz[2];
for (int k = 0; k < nstencil; k++) {
const X_FLOAT xbin2 = xbin + stencilxyz(k,0);
const X_FLOAT ybin2 = ybin + stencilxyz(k,1);
const X_FLOAT zbin2 = zbin + stencilxyz(k,2);
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
if (HalfNeigh && j <= i) continue;
else if (j == i) continue;
const int jtype = type[j];
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
const X_FLOAT delz = ztmp - x(j,2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
}
namespace LAMMPS_NS {
template class NPairKokkos<LMPDeviceType,0,0>;
template class NPairKokkos<LMPDeviceType,0,1>;
template class NPairKokkos<LMPDeviceType,1,0>;
template class NPairKokkos<LMPDeviceType,1,1>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairKokkos<LMPHostType,0,0>;
template class NPairKokkos<LMPHostType,0,1>;
template class NPairKokkos<LMPHostType,1,0>;
template class NPairKokkos<LMPHostType,1,1>;
#endif
}

435
src/KOKKOS/npair_kokkos.h Normal file
View File

@ -0,0 +1,435 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
NPairStyle(full/bin/kk/host,
NPairKokkosFullBinHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
NPairStyle(full/bin/kk/device,
NPairKokkosFullBinDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
NPairStyle(full/bin/ghost/kk/host,
NPairKokkosFullBinGhostHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
NPairStyle(full/bin/ghost/kk/device,
NPairKokkosFullBinGhostDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
NPairStyle(half/bin/ghost/kk/host,
NPairKokkosHalfBinGhostHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
NPairStyle(half/bin/ghost/kk/device,
NPairKokkosHalfBinGhostDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_KOKKOS_H
#define LMP_NPAIR_KOKKOS_H
#include "npair.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType, int HALF_NEIGH, int GHOST>
class NPairKokkos : public NPair {
public:
NPairKokkos(class LAMMPS *);
~NPairKokkos() {}
void copy_neighbor_info();
void copy_bin_info();
void copy_stencil_info();
void build(class NeighList *);
private:
int newton_pair;
int nex_type;
int maxex_type;
int nex_group;
int maxex_group;
int nex_mol;
int maxex_mol;
// data from Neighbor class
DAT::tdual_xfloat_2d k_cutneighsq;
// exclusion data from Neighbor class
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
// data from NBin class
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
// data from NStencil class
int nstencil;
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
DAT::tdual_int_1d_3 k_stencilxyz;
};
template<class DeviceType>
class NeighborKokkosExecute
{
typedef ArrayTypes<DeviceType> AT;
public:
NeighListKokkos<DeviceType> neigh_list;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
// exclusion data from Neighbor class
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
// data from NBin class
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
// data from NStencil class
int nstencil;
typename AT::t_int_1d d_stencil; // # of J neighs for each I
typename AT::t_int_1d_3 d_stencilxyz;
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nstencil,
const typename AT::t_int_1d &_d_stencil,
const typename AT::t_int_1d_3 &_d_stencilxyz,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int Newton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int Newton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
typedef LMPHostType device_type;
const NeighborKokkosExecute<LMPHostType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
};
template<class DeviceType,int HALF_NEIGH>
struct NPairKokkosBuildFunctorGhost {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
}

View File

@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairBuckKokkos : public PairBuck {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairBuckKokkos(class LAMMPS *);
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
};

View File

@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
}

View File

@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
}

View File

@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
}

View File

@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
}

View File

@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
}

View File

@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
}

View File

@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
}

View File

@ -333,145 +333,6 @@ struct PairComputeFunctor {
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
const X_FLOAT ztmp = c.c_x(i,2);
int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT3 ftmp;
for (int jj = 0; jj < jnum; jj++) {
int jjj = neighbors_i(jj);
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) return;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fftmp.x += delx*fpair;
fftmp.y += dely*fpair;
fftmp.z += delz*fpair;
if (EVFLAG) {
F_FLOAT evdwl = 0.0;
if (c.eflag) {
evdwl = 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
ev.evdwl += evdwl;
}
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
},ftmp);
}
Kokkos::single(Kokkos::PerThread(dev), [&]() {
c.f(i,0) += ftmp.x;
c.f(i,1) += ftmp.y;
c.f(i,2) += ftmp.z;
});
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
// The enable_if clause will invalidate the last parameter of the function, so that
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
// This uses the fact that failure to match template parameters is not an error.
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
// By having the enable_if with a ! and without it, exactly one of the functions
// pair_compute_neighlist will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
return ev;
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
} else if (fpair->neighflag == N2) {
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
} else if (fpair->neighflag == FULLCLUSTER) {
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
}
return ev;
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
}

View File

@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJClass2Kokkos : public PairLJClass2 {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJClass2Kokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
};

View File

@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
};

View File

@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJExpandKokkos : public PairLJExpand {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJExpandKokkos(class LAMMPS *);
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
};

View File

@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
}

View File

@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
}

View File

@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJSDKKokkos : public PairLJSDK {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJSDKKokkos(class LAMMPS *);
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
};

View File

@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");

View File

@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
} else {
if (neighflag == FULL) {
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
}
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -41,7 +41,7 @@ template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
};

View File

@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
//if (neighflag == FULL || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else
@ -126,26 +125,26 @@ void PairTersoffKokkos<DeviceType>::setup_params()
for (i = 1; i <= n; i++)
for (j = 1; j <= n; j++)
for (k = 1; k <= n; k++) {
m = elem2param[map[i]][map[j]][map[k]];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).gamma = params[m].gamma;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).c = params[m].c;
k_params.h_view(i,j,k).d = params[m].d;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
m = elem2param[i-1][j-1][k-1];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).gamma = params[m].gamma;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).c = params[m].c;
k_params.h_view(i,j,k).d = params[m].d;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
}
k_params.template modify<LMPHostType>();

View File

@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else
@ -125,27 +124,27 @@ void PairTersoffMODKokkos<DeviceType>::setup_params()
for (i = 1; i <= n; i++)
for (j = 1; j <= n; j++)
for (k = 1; k <= n; k++) {
m = elem2param[map[i]][map[j]][map[k]];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
k_params.h_view(i,j,k).c5 = params[m].c5;
k_params.h_view(i,j,k).ca1 = params[m].ca1;
k_params.h_view(i,j,k).ca4 = params[m].ca4;
k_params.h_view(i,j,k).powern_del = params[m].powern_del;
m = elem2param[i-1][j-1][k-1];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
k_params.h_view(i,j,k).c5 = params[m].c5;
k_params.h_view(i,j,k).ca1 = params[m].ca1;
k_params.h_view(i,j,k).ca4 = params[m].ca4;
k_params.h_view(i,j,k).powern_del = params[m].powern_del;
}
k_params.template modify<LMPHostType>();

View File

@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else
@ -136,30 +135,30 @@ void PairTersoffZBLKokkos<DeviceType>::setup_params()
for (i = 1; i <= n; i++)
for (j = 1; j <= n; j++)
for (k = 1; k <= n; k++) {
m = elem2param[map[i]][map[j]][map[k]];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).gamma = params[m].gamma;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).c = params[m].c;
k_params.h_view(i,j,k).d = params[m].d;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
k_params.h_view(i,j,k).Z_i = params[m].Z_i;
k_params.h_view(i,j,k).Z_j = params[m].Z_j;
k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut;
k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale;
m = elem2param[i-1][j-1][k-1];
k_params.h_view(i,j,k).powerm = params[m].powerm;
k_params.h_view(i,j,k).gamma = params[m].gamma;
k_params.h_view(i,j,k).lam3 = params[m].lam3;
k_params.h_view(i,j,k).c = params[m].c;
k_params.h_view(i,j,k).d = params[m].d;
k_params.h_view(i,j,k).h = params[m].h;
k_params.h_view(i,j,k).powern = params[m].powern;
k_params.h_view(i,j,k).beta = params[m].beta;
k_params.h_view(i,j,k).lam2 = params[m].lam2;
k_params.h_view(i,j,k).bigb = params[m].bigb;
k_params.h_view(i,j,k).bigr = params[m].bigr;
k_params.h_view(i,j,k).bigd = params[m].bigd;
k_params.h_view(i,j,k).lam1 = params[m].lam1;
k_params.h_view(i,j,k).biga = params[m].biga;
k_params.h_view(i,j,k).cutsq = params[m].cutsq;
k_params.h_view(i,j,k).c1 = params[m].c1;
k_params.h_view(i,j,k).c2 = params[m].c2;
k_params.h_view(i,j,k).c3 = params[m].c3;
k_params.h_view(i,j,k).c4 = params[m].c4;
k_params.h_view(i,j,k).Z_i = params[m].Z_i;
k_params.h_view(i,j,k).Z_j = params[m].Z_j;
k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut;
k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale;
}
k_params.template modify<LMPHostType>();

View File

@ -33,10 +33,10 @@ template<class DeviceType>
class RegBlockKokkos : public RegBlock {
friend class FixPour;
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
public:
RegBlockKokkos(class LAMMPS *, int, char **);
~RegBlockKokkos();
void match_all_kokkos(int, DAT::t_int_1d);

View File

@ -630,22 +630,17 @@ void Finish::end(int flag)
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for (m = 0; m < neighbor->old_nrequest; m++) {
for (m = 0; m < neighbor->old_nrequest; m++)
if ((neighbor->old_requests[m]->half ||
neighbor->old_requests[m]->gran ||
neighbor->old_requests[m]->respaouter ||
neighbor->old_requests[m]->half_from_full) &&
neighbor->old_requests[m]->skip == 0 &&
neighbor->lists[m] && neighbor->lists[m]->numneigh) {
if (!neighbor->lists[m] && lmp->kokkos &&
lmp->kokkos->neigh_list_kokkos(m)) break;
else break;
}
}
neighbor->lists[m] && neighbor->lists[m]->numneigh) break;
nneigh = 0;
if (m < neighbor->old_nrequest) {
if (neighbor->lists[m]) {
if (!neighbor->lists[m]->kokkos) {
int inum = neighbor->lists[m]->inum;
int *ilist = neighbor->lists[m]->ilist;
int *numneigh = neighbor->lists[m]->numneigh;
@ -675,23 +670,19 @@ void Finish::end(int flag)
// count neighbors in that list for stats purposes
// allow it to be Kokkos neigh list as well
for (m = 0; m < neighbor->old_nrequest; m++) {
for (m = 0; m < neighbor->old_nrequest; m++)
if (neighbor->old_requests[m]->full &&
neighbor->old_requests[m]->skip == 0) {
if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
else break;
}
}
neighbor->old_requests[m]->skip == 0) break;
nneighfull = 0;
if (m < neighbor->old_nrequest) {
if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
if (!neighbor->lists[m]->kokkos && neighbor->lists[m]->numneigh) {
int inum = neighbor->lists[m]->inum;
int *ilist = neighbor->lists[m]->ilist;
int *numneigh = neighbor->lists[m]->numneigh;
for (i = 0; i < inum; i++)
nneighfull += numneigh[ilist[i]];
} else if (!neighbor->lists[m] && lmp->kokkos)
} else if (lmp->kokkos)
nneighfull = lmp->kokkos->neigh_count(m);
tmp = nneighfull;
@ -865,7 +856,7 @@ void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
time_cpu = tmp/nprocs*100.0;
// % variance from the average as measure of load imbalance
if ((time_sq/time - time) > 1.0e-10)
if (time > 1.0e-10)
time_sq = sqrt(time_sq/time - time)*100.0;
else
time_sq = 0.0;
@ -917,7 +908,7 @@ void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
time_std /= nthreads;
time_total /= nthreads;
if ((time_std/time_avg -time_avg) > 1.0e-10)
if (time_avg > 1.0e-10)
time_std = sqrt(time_std/time_avg - time_avg)*100.0;
else
time_std = 0.0;

View File

@ -67,6 +67,11 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp)
ipage = NULL;
dpage = NULL;
// Kokkos package
kokkos = 0;
execution_space = Host;
// USER-DPD package
ndxAIR_ssa = NULL;

View File

@ -69,6 +69,11 @@ class NeighList : protected Pointers {
NeighList *listcopy; // me = copy list, point to list I copy from
NeighList *listskip; // me = skip list, point to list I skip from
// Kokkos package
int kokkos; // 1 if list stores Kokkos data
ExecutionSpace execution_space;
// USER-DPD package and Shardlow Splitting Algorithm (SSA) support
uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR
@ -80,7 +85,6 @@ class NeighList : protected Pointers {
void post_constructor(class NeighRequest *);
void setup_pages(int, int); // setup page data structures
void grow(int,int); // grow all data structs
void stencil_allocate(int, int); // allocate stencil arrays
void print_attributes(); // debug routine
int get_maxlocal() {return maxatom;}
bigint memory_usage();

View File

@ -621,8 +621,7 @@ void Neighbor::init_pair()
delete [] neigh_stencil;
delete [] neigh_pair;
if (lmp->kokkos) nlist = init_lists_kokkos();
else nlist = nrequest;
nlist = nrequest;
lists = new NeighList*[nrequest];
neigh_bin = new NBin*[nrequest];
@ -634,11 +633,10 @@ void Neighbor::init_pair()
// wait to allocate initial pages until copy lists are detected
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_host || requests[i]->kokkos_device) {
lists[i] = NULL;
continue;
}
lists[i] = new NeighList(lmp);
if (requests[i]->kokkos_host || requests[i]->kokkos_device)
create_kokkos_list(i);
else
lists[i] = new NeighList(lmp);
lists[i]->index = i;
if (requests[i]->pair) {
@ -680,10 +678,8 @@ void Neighbor::init_pair()
// would be useful when reax/c used in hybrid mode, e.g. with airebo
for (i = 0; i < nrequest; i++) {
if (lists[i] == NULL) continue; // Kokkos
if (requests[i]->pair && requests[i]->half && requests[i]->newton != 2) {
for (j = 0; j < nrequest; j++) {
if (lists[j] == NULL) continue; // Kokkos
if (requests[j]->full && requests[j]->occasional == 0 &&
!requests[j]->skip && !requests[j]->copy) break;
}
@ -708,10 +704,8 @@ void Neighbor::init_pair()
// for 1st or 2nd check, parent can be copy list or pair or fix
for (i = 0; i < nrequest; i++) {
if (lists[i] == NULL) continue; // Kokkos
if (!requests[i]->fix && !requests[i]->compute) continue;
for (j = 0; j < nrequest; j++) {
if (lists[j] == NULL) continue; // Kokkos
if (requests[i]->half && requests[j]->pair &&
!requests[j]->skip && requests[j]->half && !requests[j]->copy)
break;
@ -733,7 +727,6 @@ void Neighbor::init_pair()
continue;
}
for (j = 0; j < nrequest; j++) {
if (lists[j] == NULL) continue; // Kokkos
if (requests[i]->half && requests[j]->pair &&
!requests[j]->skip && requests[j]->full && !requests[j]->copy)
break;
@ -844,7 +837,6 @@ void Neighbor::init_pair()
int dnummax = 0;
for (i = 0; i < nlist; i++) {
if (lists[i] == NULL) continue; // Kokkos
if (lists[i]->copy) continue;
lists[i]->setup_pages(pgsize,oneatom);
dnummax = MAX(dnummax,lists[i]->dnum);
@ -864,14 +856,8 @@ void Neighbor::init_pair()
// also Kokkos list initialization
int maxatom = atom->nmax;
for (i = 0; i < nlist; i++) {
if (lists[i]) {
if (neigh_pair[i] && !lists[i]->copy) lists[i]->grow(maxatom,maxatom);
} else {
init_list_flags1_kokkos(i);
init_list_grow_kokkos(i);
}
}
for (i = 0; i < nlist; i++)
if (neigh_pair[i] && !lists[i]->copy) lists[i]->grow(maxatom,maxatom);
// plist = indices of perpetual NPair classes
// perpetual = non-occasional, re-built at every reneighboring
@ -885,10 +871,8 @@ void Neighbor::init_pair()
plist = new int[nlist];
for (i = 0; i < nlist; i++) {
if (lists[i]) {
if (lists[i]->occasional == 0 && lists[i]->pair_method)
plist[npair_perpetual++] = i;
} else init_list_flags2_kokkos(i);
if (lists[i]->occasional == 0 && lists[i]->pair_method)
plist[npair_perpetual++] = i;
}
for (i = 0; i < nstencil; i++) {
@ -910,7 +894,6 @@ void Neighbor::init_pair()
while (!done) {
done = 1;
for (i = 0; i < npair_perpetual; i++) {
if (!lists[plist[i]]) continue; // Kokkos check
ptr = NULL;
if (lists[plist[i]]->listcopy) ptr = lists[plist[i]]->listcopy;
if (lists[plist[i]]->listskip) ptr = lists[plist[i]]->listskip;
@ -1154,15 +1137,14 @@ void Neighbor::print_pairwise_info()
else if (requests[i]->respamiddle) kind = "respa/middle";
else if (requests[i]->respaouter) kind = "respa/outer";
else if (requests[i]->half_from_full) kind = "half/from/full";
else if (requests[i]->full_cluster) kind = "full/cluster"; // Kokkos
fprintf(out," kind: %s",kind);
if (requests[i]->occasional) fprintf(out,", occasional");
else fprintf(out,", perpetual");
if (requests[i]->ghost) fprintf(out,", ghost");
if (requests[i]->ssa) fprintf(out,", ssa");
if (requests[i]->omp) fprintf(out,", omp");
if (requests[i]->intel) fprintf(out,", intel");
if (requests[i]->kokkos_device) fprintf(out,", kokkos_device");
if (requests[i]->kokkos_host) fprintf(out,", kokkos_host");
if (requests[i]->copy)
fprintf(out,", copy from (%d)",requests[i]->otherlist+1);
if (requests[i]->skip)
@ -1237,13 +1219,17 @@ int Neighbor::choose_bin(NeighRequest *rq)
// flags for settings the request + system requires of NBin class
// ssaflag = no/yes ssa request
// intelflag = no/yes intel request
// kokkos_device_flag = no/yes kokkos device request
// kokkos_host_flag = no/yes kokkos host request
int ssaflag,intelflag;
int ssaflag,intelflag,kokkos_device_flag,kokkos_host_flag;
ssaflag = intelflag = 0;
ssaflag = intelflag = kokkos_device_flag = kokkos_host_flag = 0;
if (rq->ssa) ssaflag = NB_SSA;
if (rq->intel) intelflag = NB_INTEL;
if (rq->kokkos_device) kokkos_device_flag = NB_KOKKOS_DEVICE;
if (rq->kokkos_host) kokkos_host_flag = NB_KOKKOS_HOST;
// use flags to match exactly one of NBin class masks, bit by bit
@ -1254,6 +1240,8 @@ int Neighbor::choose_bin(NeighRequest *rq)
if (ssaflag != (mask & NB_SSA)) continue;
if (intelflag != (mask & NB_INTEL)) continue;
if (kokkos_device_flag != (mask & NB_KOKKOS_DEVICE)) continue;
if (kokkos_host_flag != (mask & NB_KOKKOS_HOST)) continue;
return i+1;
}
@ -1308,6 +1296,7 @@ int Neighbor::choose_stencil(NeighRequest *rq)
else if (rq->newton == 1) newtflag = 1;
else if (rq->newton == 2) newtflag = 0;
// use flags to match exactly one of NStencil class masks, bit by bit
// exactly one of halfflag,fullflag is set and thus must match
@ -1381,16 +1370,18 @@ int Neighbor::choose_pair(NeighRequest *rq)
// ssaflag = no/yes request
// ompflag = no/yes omp request
// intelflag = no/yes intel request
// kokkos_device_flag = no/yes Kokkos device request
// kokkos_host_flag = no/yes Kokkos host request
// newtflag = newton off/on request
// style = NSQ/BIN/MULTI neighbor style
// triclinic = orthgonal/triclinic box
int copyflag,skipflag,halfflag,fullflag,halffullflag,sizeflag,respaflag,
ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag;
ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag,kokkos_device_flag,kokkos_host_flag;
copyflag = skipflag = halfflag = fullflag = halffullflag = sizeflag =
ghostflag = respaflag = off2onflag = onesideflag = ssaflag =
ompflag = intelflag = 0;
ompflag = intelflag = kokkos_device_flag = kokkos_host_flag = 0;
if (rq->copy) copyflag = NP_COPY;
if (rq->skip) skipflag = NP_SKIP;
@ -1420,6 +1411,8 @@ int Neighbor::choose_pair(NeighRequest *rq)
if (rq->ssa) ssaflag = NP_SSA;
if (rq->omp) ompflag = NP_OMP;
if (rq->intel) intelflag = NP_INTEL;
if (rq->kokkos_device) kokkos_device_flag = NP_KOKKOS_DEVICE;
if (rq->kokkos_host) kokkos_host_flag = NP_KOKKOS_HOST;
int newtflag;
if (rq->newton == 0 && newton_pair) newtflag = 1;
@ -1460,6 +1453,8 @@ int Neighbor::choose_pair(NeighRequest *rq)
if (ssaflag != (mask & NP_SSA)) continue;
if (ompflag != (mask & NP_OMP)) continue;
if (intelflag != (mask & NP_INTEL)) continue;
if (kokkos_device_flag != (mask & NP_KOKKOS_DEVICE)) continue;
if (kokkos_host_flag != (mask & NP_KOKKOS_HOST)) continue;
if (style == NSQ && !(mask & NP_NSQ)) continue;
if (style == BIN && !(mask & NP_BIN)) continue;
@ -1802,6 +1797,7 @@ void Neighbor::build_one(class NeighList *mylist, int preflag)
ns->create();
}
// build the list
np->build_setup();

View File

@ -201,7 +201,7 @@ class Neighbor : protected Pointers {
void init_styles();
void init_pair();
void init_topology();
virtual void init_topology();
void print_pairwise_info();
void requests_new2old();
@ -220,18 +220,17 @@ class Neighbor : protected Pointers {
int copymode;
virtual void init_cutneighsq_kokkos(int) {}
virtual int init_lists_kokkos() {return 0;}
virtual void init_list_flags1_kokkos(int) {}
virtual void init_list_flags2_kokkos(int) {}
virtual void create_kokkos_list(int) {}
virtual void init_ex_type_kokkos(int) {}
virtual void init_ex_bit_kokkos() {}
virtual void init_ex_mol_bit_kokkos() {}
virtual void init_list_grow_kokkos(int) {}
};
namespace NeighConst {
static const int NB_SSA = 1<<0;
static const int NB_INTEL = 1<<1;
static const int NB_KOKKOS_DEVICE = 1<<2;
static const int NB_KOKKOS_HOST = 1<<3;
static const int NS_HALF = 1<<0;
static const int NS_FULL = 1<<1;
@ -266,6 +265,8 @@ namespace NeighConst {
static const int NP_NEWTOFF = 1<<17;
static const int NP_ORTHO = 1<<18;
static const int NP_TRI = 1<<19;
static const int NP_KOKKOS_DEVICE = 1<<20;
static const int NP_KOKKOS_HOST = 1<<21;
}
}

View File

@ -31,7 +31,7 @@ class NPair : protected Pointers {
NPair(class LAMMPS *);
virtual ~NPair() {}
void copy_neighbor_info();
virtual void copy_neighbor_info();
void build_setup();
virtual void build(class NeighList *) = 0;
@ -94,8 +94,8 @@ class NPair : protected Pointers {
// methods for all NPair variants
void copy_bin_setup_info();
void copy_bin_info();
void copy_stencil_info();
virtual void copy_bin_info();
virtual void copy_stencil_info();
int exclusion(int, int, int,
int, int *, tagint *) const; // test for pair exclusion

View File

@ -37,7 +37,7 @@ class NStencil : protected Pointers {
NStencil(class LAMMPS *);
virtual ~NStencil();
void copy_neighbor_info();
void create_setup();
virtual void create_setup();
bigint memory_usage();
virtual void create() = 0;