From 8318c6781697cdb8dd88a3208758c367295a99d2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 7 Dec 2016 13:00:27 -0700 Subject: [PATCH 1/4] Kokkos neighbor refactor --- src/KOKKOS/Install.sh | 7 +- src/KOKKOS/atom_vec_kokkos.h | 14 +- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 2 - src/KOKKOS/kokkos.cpp | 49 +- src/KOKKOS/kokkos.h | 1 - src/KOKKOS/nbin_kokkos.cpp | 144 ++++ src/KOKKOS/nbin_kokkos.h | 153 ++++ src/KOKKOS/neigh_list_kokkos.cpp | 48 +- src/KOKKOS/neigh_list_kokkos.h | 15 +- src/KOKKOS/neighbor_kokkos.cpp | 374 ++------- src/KOKKOS/neighbor_kokkos.h | 368 +-------- src/KOKKOS/npair_kokkos.cpp | 746 ++++++++++++++++++ src/KOKKOS/npair_kokkos.h | 435 ++++++++++ src/KOKKOS/pair_buck_coul_cut_kokkos.cpp | 9 +- src/KOKKOS/pair_buck_coul_long_kokkos.cpp | 4 +- src/KOKKOS/pair_buck_kokkos.cpp | 9 +- src/KOKKOS/pair_buck_kokkos.h | 5 +- src/KOKKOS/pair_coul_cut_kokkos.cpp | 4 +- src/KOKKOS/pair_coul_debye_kokkos.cpp | 9 +- src/KOKKOS/pair_coul_dsf_kokkos.cpp | 2 - src/KOKKOS/pair_coul_long_kokkos.cpp | 4 +- src/KOKKOS/pair_coul_wolf_kokkos.cpp | 2 - src/KOKKOS/pair_eam_alloy_kokkos.cpp | 2 - src/KOKKOS/pair_eam_fs_kokkos.cpp | 2 - src/KOKKOS/pair_eam_kokkos.cpp | 2 - src/KOKKOS/pair_kokkos.h | 189 +---- ..._lj_charmm_coul_charmm_implicit_kokkos.cpp | 4 +- .../pair_lj_charmm_coul_charmm_kokkos.cpp | 4 +- .../pair_lj_charmm_coul_long_kokkos.cpp | 4 +- src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp | 9 +- .../pair_lj_class2_coul_long_kokkos.cpp | 4 +- src/KOKKOS/pair_lj_class2_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_class2_kokkos.h | 5 +- src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp | 4 +- src/KOKKOS/pair_lj_cut_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_cut_kokkos.h | 5 +- src/KOKKOS/pair_lj_expand_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_expand_kokkos.h | 5 +- .../pair_lj_gromacs_coul_gromacs_kokkos.cpp | 4 +- src/KOKKOS/pair_lj_gromacs_kokkos.cpp | 4 +- src/KOKKOS/pair_lj_sdk_kokkos.cpp | 9 +- src/KOKKOS/pair_lj_sdk_kokkos.h | 5 +- src/KOKKOS/pair_reax_c_kokkos.cpp | 2 - src/KOKKOS/pair_sw_kokkos.cpp | 1 - src/KOKKOS/pair_table_kokkos.cpp | 35 +- src/KOKKOS/pair_table_kokkos.h | 10 +- src/KOKKOS/pair_tersoff_kokkos.cpp | 41 +- src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 43 +- src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 49 +- src/KOKKOS/region_block_kokkos.h | 2 +- src/finish.cpp | 27 +- src/neigh_list.cpp | 5 + src/neigh_list.h | 6 +- src/neighbor.cpp | 62 +- src/neighbor.h | 11 +- src/npair.h | 6 +- src/nstencil.h | 2 +- 60 files changed, 1742 insertions(+), 1279 deletions(-) create mode 100644 src/KOKKOS/nbin_kokkos.cpp create mode 100644 src/KOKKOS/nbin_kokkos.h create mode 100644 src/KOKKOS/npair_kokkos.cpp create mode 100644 src/KOKKOS/npair_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 93adf58ef5..ebafb87466 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -105,11 +105,14 @@ action modify_kokkos.cpp action modify_kokkos.h action neigh_bond_kokkos.cpp action neigh_bond_kokkos.h -action neigh_full_kokkos.h action neigh_list_kokkos.cpp action neigh_list_kokkos.h action neighbor_kokkos.cpp action neighbor_kokkos.h +action npair_kokkos.cpp +action npair_kokkos.h +action nbin_kokkos.cpp +action nbin_kokkos.h action math_special_kokkos.cpp action math_special_kokkos.h action pair_buck_coul_cut_kokkos.cpp @@ -169,8 +172,6 @@ action pair_reax_c_kokkos.cpp pair_reax_c.cpp action pair_reax_c_kokkos.h pair_reax_c.h action pair_sw_kokkos.cpp pair_sw.cpp action pair_sw_kokkos.h pair_sw.h -action pair_vashishta_kokkos.cpp pair_vashishta.cpp -action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h action pair_tersoff_kokkos.cpp pair_tersoff.cpp diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 7ac66f1626..fbeeaf96be 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -83,13 +83,8 @@ class AtomVecKokkos : public AtomVec { std::is_same::value, Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, Kokkos::MemoryTraits > mirror_type; - if (buffer_size == 0) { - buffer = Kokkos::kokkos_malloc(src.capacity()); - buffer_size = src.capacity(); - } else if (buffer_size < src.capacity()) { + if(buffer_size < src.capacity()) buffer = Kokkos::kokkos_realloc(buffer,src.capacity()); - buffer_size = src.capacity(); - } return mirror_type( buffer , src.dimension_0() , src.dimension_1() , @@ -109,13 +104,8 @@ class AtomVecKokkos : public AtomVec { std::is_same::value, Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, Kokkos::MemoryTraits > mirror_type; - if (buffer_size == 0) { - buffer = Kokkos::kokkos_malloc(src.capacity()*sizeof(typename ViewType::value_type)); - buffer_size = src.capacity(); - } else if (buffer_size < src.capacity()) { + if(buffer_size < src.capacity()) buffer = Kokkos::kokkos_realloc(buffer,src.capacity()*sizeof(typename ViewType::value_type)); - buffer_size = src.capacity(); - } mirror_type tmp_view( (typename ViewType::value_type*)buffer , src.dimension_0() , src.dimension_1() , diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 0c0039a18a..844d48dae0 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -125,12 +125,10 @@ void FixQEqReaxKokkos::init() neighbor->requests[irequest]->pair = 0; neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else { //if (neighflag == HALF || neighflag == HALFTHREAD) neighbor->requests[irequest]->fix = 1; neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 72bf094e4b..763c97d69b 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg) else neighflag = HALF; } else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2; - else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"binsize") == 0) { @@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg) called by Finish ------------------------------------------------------------------------- */ -int KokkosLMP::neigh_list_kokkos(int m) -{ - NeighborKokkos *nk = (NeighborKokkos *) neighbor; - if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0()) - return 1; - if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0()) - return 1; - return 0; -} - -/* ---------------------------------------------------------------------- - called by Finish -------------------------------------------------------------------------- */ - int KokkosLMP::neigh_count(int m) { int inum; @@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m) ArrayTypes::t_int_1d h_numneigh; NeighborKokkos *nk = (NeighborKokkos *) neighbor; - if (nk->lists_host[m]) { - inum = nk->lists_host[m]->inum; + if (nk->lists[m]->execution_space == Host) { + NeighListKokkos* nlistKK = (NeighListKokkos*) nk->lists[m]; + inum = nlistKK->inum; #ifndef KOKKOS_USE_CUDA_UVM - h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist); - h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh); + h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist); + h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh); #else - h_ilist = nk->lists_host[m]->d_ilist; - h_numneigh = nk->lists_host[m]->d_numneigh; + h_ilist = nlistKK->d_ilist; + h_numneigh = nlistKK->d_numneigh; #endif - Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist); - Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh); - } else if (nk->lists_device[m]) { - inum = nk->lists_device[m]->inum; + Kokkos::deep_copy(h_ilist,nlistKK->d_ilist); + Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh); + } else if (nk->lists[m]->execution_space == Device) { + NeighListKokkos* nlistKK = (NeighListKokkos*) nk->lists[m]; + inum = nlistKK->inum; #ifndef KOKKOS_USE_CUDA_UVM - h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist); - h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh); + h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist); + h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh); #else - h_ilist = nk->lists_device[m]->d_ilist; - h_numneigh = nk->lists_device[m]->d_numneigh; + h_ilist = nlistKK->d_ilist; + h_numneigh = nlistKK->d_numneigh; #endif - Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist); - Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh); + Kokkos::deep_copy(h_ilist,nlistKK->d_ilist); + Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh); } for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]]; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 1058affcfc..3b91a56ea7 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers { KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP(); void accelerator(int, char **); - int neigh_list_kokkos(int); int neigh_count(int); private: static void my_signal_handler(int); diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp new file mode 100644 index 0000000000..feec72f45a --- /dev/null +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -0,0 +1,144 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "nbin_kokkos.h" +#include "neighbor.h" +#include "atom_kokkos.h" +#include "group.h" +#include "domain.h" +#include "comm.h" +#include "update.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +enum{NSQ,BIN,MULTI}; // also in Neighbor + +#define SMALL 1.0e-6 +#define CUT2BIN_RATIO 100 + +/* ---------------------------------------------------------------------- */ + +template +NBinKokkos::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) { + atoms_per_bin = 16; + + d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(d_resize); +#else + h_resize = d_resize; +#endif + h_resize() = 1; + +} + +/* ---------------------------------------------------------------------- + setup neighbor binning geometry + bin numbering in each dimension is global: + 0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc + nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc + -1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc + code will work for any binsize + since next(xyz) and stencil extend as far as necessary + binsize = 1/2 of cutoff is roughly optimal + for orthogonal boxes: + a dim must be filled exactly by integer # of bins + in periodic, procs on both sides of PBC must see same bin boundary + in non-periodic, coord2bin() still assumes this by use of nbin xyz + for triclinic boxes: + tilted simulation box cannot contain integer # of bins + stencil & neigh list built differently to account for this + mbinlo = lowest global bin any of my ghost atoms could fall into + mbinhi = highest global bin any of my ghost atoms could fall into + mbin = number of bins I need in a dimension +------------------------------------------------------------------------- */ + +template +void NBinKokkos::bin_atoms_setup(int nall) +{ + if (mbins > k_bins.d_view.dimension_0()) { + k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin); + bins = k_bins.view(); + + k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins); + bincount = k_bincount.view(); + last_bin_memory = update->ntimestep; + } + + last_bin = update->ntimestep; +} + +/* ---------------------------------------------------------------------- + bin owned and ghost atoms +------------------------------------------------------------------------- */ + +template +void NBinKokkos::bin_atoms() +{ + h_resize() = 1; + + while(h_resize() > 0) { + h_resize() = 0; + deep_copy(d_resize, h_resize); + + MemsetZeroFunctor f_zero; + f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); + Kokkos::parallel_for(mbins, f_zero); + DeviceType::fence(); + + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + x = atomKK->k_x.view(); + + bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; + bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; + + NPairKokkosBinAtomsFunctor f(*this); + + Kokkos::parallel_for(atom->nlocal+atom->nghost, f); + DeviceType::fence(); + + deep_copy(h_resize, d_resize); + if(h_resize()) { + + atoms_per_bin += 16; + k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); + bins = k_bins.view(); + c_bins = bins; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void NBinKokkos::binatomsItem(const int &i) const +{ + const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2)); + + const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1); + if(ac < bins.dimension_1()) { + bins(ibin, ac) = i; + } else { + d_resize() = 1; + } +} + +namespace LAMMPS_NS { +template class NBinKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class NBinKokkos; +#endif +} diff --git a/src/KOKKOS/nbin_kokkos.h b/src/KOKKOS/nbin_kokkos.h new file mode 100644 index 0000000000..de3cf41d19 --- /dev/null +++ b/src/KOKKOS/nbin_kokkos.h @@ -0,0 +1,153 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NBIN_CLASS + +NBinStyle(kk/host, + NBinKokkos, + NB_KOKKOS_HOST) + +NBinStyle(kk/device, + NBinKokkos, + NB_KOKKOS_DEVICE) + +#else + +#ifndef LMP_NBIN_KOKKOS_H +#define LMP_NBIN_KOKKOS_H + +#include "nbin_standard.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class NBinKokkos : public NBinStandard { + public: + typedef ArrayTypes AT; + + NBinKokkos(class LAMMPS *); + ~NBinKokkos() {} + void bin_atoms_setup(int); + void bin_atoms(); + + int atoms_per_bin; + DAT::tdual_int_1d k_bincount; + DAT::tdual_int_2d k_bins; + + typename AT::t_int_1d bincount; + const typename AT::t_int_1d_const c_bincount; + typename AT::t_int_2d bins; + typename AT::t_int_2d_const c_bins; + typename AT::t_int_scalar d_resize; + typename ArrayTypes::t_int_scalar h_resize; + typename AT::t_x_array_randomread x; + + KOKKOS_INLINE_FUNCTION + void binatomsItem(const int &i) const; + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + i[0] = ix - mbinxlo; + i[1] = iy - mbinylo; + i[2] = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + private: + double bboxlo_[3],bboxhi_[3]; +}; + +template +struct NPairKokkosBinAtomsFunctor { + typedef DeviceType device_type; + + const NBinKokkos c; + + NPairKokkosBinAtomsFunctor(const NBinKokkos &_c): + c(_c) {}; + ~NPairKokkosBinAtomsFunctor() {} + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.binatomsItem(i); + } +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index cbba2120bd..b1b4e4467a 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -34,9 +34,8 @@ void NeighListKokkos::clean_copy() ipage = NULL; dpage = NULL; - maxstencil = 0; - ghostflag = 0; - maxstencil_multi = 0; + + maxatoms = 0; } /* ---------------------------------------------------------------------- */ @@ -70,49 +69,6 @@ void NeighListKokkos::grow(int nmax) /* ---------------------------------------------------------------------- */ -template -void NeighListKokkos::stencil_allocate(int smax, int style) -{ - int i; - - if (style == BIN) { - if (smax > maxstencil) { - maxstencil = smax; - d_stencil = - memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil, - "neighlist:stencil"); - if (ghostflag) { - memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil, - 3,"neighlist:stencilxyz"); - } - } - - } else { - int n = atom->ntypes; - if (maxstencil_multi == 0) { - nstencil_multi = new int[n+1]; - stencil_multi = new int*[n+1]; - distsq_multi = new double*[n+1]; - for (i = 1; i <= n; i++) { - nstencil_multi[i] = 0; - stencil_multi[i] = NULL; - distsq_multi[i] = NULL; - } - } - if (smax > maxstencil_multi) { - maxstencil_multi = smax; - for (i = 1; i <= n; i++) { - memory->destroy(stencil_multi[i]); - memory->destroy(distsq_multi[i]); - memory->create(stencil_multi[i],maxstencil_multi, - "neighlist:stencil_multi"); - memory->create(distsq_multi[i],maxstencil_multi, - "neighlist:distsq_multi"); - } - } - } -} - namespace LAMMPS_NS { template class NeighListKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index 85f0f38d2c..393fa478a1 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -20,7 +20,7 @@ namespace LAMMPS_NS { -enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u}; +enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u}; class AtomNeighbors { @@ -74,14 +74,12 @@ public: typename DAT::tdual_int_1d k_ilist; // local indices of I atoms typename ArrayTypes::t_int_1d d_ilist; typename ArrayTypes::t_int_1d d_numneigh; // # of J neighs for each I - typename ArrayTypes::t_int_1d d_stencil; // # of J neighs for each I - typename ArrayTypes::t_int_1d h_stencil; // # of J neighs per I - typename ArrayTypes::t_int_1d_3 d_stencilxyz; - typename ArrayTypes::t_int_1d_3 h_stencilxyz; NeighListKokkos(class LAMMPS *lmp): - NeighList(lmp) {_stride = 1; maxneighs = 16;}; - ~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;}; + NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; + execution_space = ExecutionSpaceFromDevice::space; + }; + ~NeighListKokkos() {numneigh = NULL; ilist = NULL;}; KOKKOS_INLINE_FUNCTION AtomNeighbors get_neighbors(const int &i) const { @@ -99,7 +97,8 @@ public: int& num_neighs(const int & i) const { return d_numneigh(i); } - void stencil_allocate(int smax, int style); + private: + int maxatoms; }; } diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 31fa1859f9..ff154c9919 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "error.h" #include "kokkos.h" -#include "force.h" -#include "bond.h" -#include "angle.h" -#include "dihedral.h" -#include "improper.h" +#include "style_nbin.h" +#include "style_nstencil.h" +#include "style_npair.h" +#include "style_ntopo.h" using namespace LAMMPS_NS; @@ -36,18 +35,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp), neighbond_host(lmp),neighbond_device(lmp) { - atoms_per_bin = 16; - - nlist_host = 0; - lists_host = NULL; - pair_build_host = NULL; - stencil_create_host = NULL; - nlist_device = 0; - lists_device = NULL; - pair_build_device = NULL; - stencil_create_device = NULL; - device_flag = 0; + bondlist = NULL; + anglelist = NULL; + dihedrallist = NULL; + improperlist = NULL; } /* ---------------------------------------------------------------------- */ @@ -58,14 +50,6 @@ NeighborKokkos::~NeighborKokkos() memory->destroy_kokkos(k_cutneighsq,cutneighsq); cutneighsq = NULL; - for (int i = 0; i < nlist_host; i++) delete lists_host[i]; - delete [] lists_host; - for (int i = 0; i < nlist_device; i++) delete lists_device[i]; - delete [] lists_device; - - delete [] pair_build_device; - delete [] pair_build_host; - memory->destroy_kokkos(k_ex_type,ex_type); memory->destroy_kokkos(k_ex1_type,ex1_type); memory->destroy_kokkos(k_ex2_type,ex2_type); @@ -89,6 +73,11 @@ void NeighborKokkos::init() { atomKK = (AtomKokkos *) atom; Neighbor::init(); + + // 1st time allocation of xhold + + if (dist_check) + xhold = DAT::tdual_x_array("neigh:xhold",maxhold); } /* ---------------------------------------------------------------------- */ @@ -101,158 +90,13 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n) /* ---------------------------------------------------------------------- */ -int NeighborKokkos::init_lists_kokkos() +void NeighborKokkos::create_kokkos_list(int i) { - int i; - - for (i = 0; i < nlist_host; i++) delete lists_host[i]; - delete [] lists_host; - delete [] pair_build_host; - delete [] stencil_create_host; - nlist_host = 0; - - for (i = 0; i < nlist_device; i++) delete lists_device[i]; - delete [] lists_device; - delete [] pair_build_device; - delete [] stencil_create_device; - nlist_device = 0; - - nlist = 0; - for (i = 0; i < nrequest; i++) { - if (requests[i]->kokkos_device) nlist_device++; - else if (requests[i]->kokkos_host) nlist_host++; - else nlist++; - } - - lists_host = new NeighListKokkos*[nrequest]; - pair_build_host = new PairPtrHost[nrequest]; - stencil_create_host = new StencilPtrHost[nrequest]; - for (i = 0; i < nrequest; i++) { - lists_host[i] = NULL; - pair_build_host[i] = NULL; - stencil_create_host[i] = NULL; - } - - for (i = 0; i < nrequest; i++) { - if (!requests[i]->kokkos_host) continue; - lists_host[i] = new NeighListKokkos(lmp); - lists_host[i]->index = i; - lists_host[i]->dnum = requests[i]->dnum; - if (requests[i]->pair) { - Pair *pair = (Pair *) requests[i]->requestor; - pair->init_list(requests[i]->id,lists_host[i]); - } - if (requests[i]->fix) { - Fix *fix = (Fix *) requests[i]->requestor; - fix->init_list(requests[i]->id,lists_host[i]); - } - } - - lists_device = new NeighListKokkos*[nrequest]; - pair_build_device = new PairPtrDevice[nrequest]; - stencil_create_device = new StencilPtrDevice[nrequest]; - for (i = 0; i < nrequest; i++) { - lists_device[i] = NULL; - pair_build_device[i] = NULL; - stencil_create_device[i] = NULL; - } - - for (i = 0; i < nrequest; i++) { - if (!requests[i]->kokkos_device) continue; - lists_device[i] = new NeighListKokkos(lmp); - lists_device[i]->index = i; - lists_device[i]->dnum = requests[i]->dnum; - if (requests[i]->pair) { - Pair *pair = (Pair *) requests[i]->requestor; - pair->init_list(requests[i]->id,lists_device[i]); - } - if (requests[i]->fix) { - Fix *fix = (Fix *) requests[i]->requestor; - fix->init_list(requests[i]->id,lists_device[i]); - } - } - - // 1st time allocation of xhold - - if (dist_check) - xhold = DAT::tdual_x_array("neigh:xhold",maxhold); - - // return # of non-Kokkos lists - - return nlist; -} - -/* ---------------------------------------------------------------------- */ - -void NeighborKokkos::init_list_flags1_kokkos(int i) -{ - if (style != BIN) - error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists"); - - if (lists_host[i]) { - lists_host[i]->buildflag = 1; - if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0; - if (requests[i]->occasional) lists_host[i]->buildflag = 0; - - lists_host[i]->growflag = 1; - if (requests[i]->copy) lists_host[i]->growflag = 0; - - lists_host[i]->stencilflag = 1; - if (style == NSQ) lists_host[i]->stencilflag = 0; - if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0; - - lists_host[i]->ghostflag = 0; - if (requests[i]->ghost) lists_host[i]->ghostflag = 1; - if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1; - } - - if (lists_device[i]) { - lists_device[i]->buildflag = 1; - if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0; - if (requests[i]->occasional) lists_device[i]->buildflag = 0; - - lists_device[i]->growflag = 1; - if (requests[i]->copy) lists_device[i]->growflag = 0; - - lists_device[i]->stencilflag = 1; - if (style == NSQ) lists_device[i]->stencilflag = 0; - if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0; - - lists_device[i]->ghostflag = 0; - if (requests[i]->ghost) lists_device[i]->ghostflag = 1; - if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1; - } -} - -/* ---------------------------------------------------------------------- */ - -void NeighborKokkos::init_list_flags2_kokkos(int i) -{ - if (lists_host[i]) { - if (lists_host[i]->buildflag) blist[nblist++] = i; - if (lists_host[i]->growflag && requests[i]->occasional == 0) - glist[nglist++] = i; - if (lists_host[i]->stencilflag && requests[i]->occasional == 0) - slist[nslist++] = i; - } - - if (lists_device[i]) { - if (lists_device[i]->buildflag) blist[nblist++] = i; - if (lists_device[i]->growflag && requests[i]->occasional == 0) - glist[nglist++] = i; - if (lists_device[i]->stencilflag && requests[i]->occasional == 0) - slist[nslist++] = i; - } -} - -/* ---------------------------------------------------------------------- */ - -void NeighborKokkos::init_list_grow_kokkos(int i) -{ - if (lists_host[i]!=NULL && lists_host[i]->growflag) - lists_host[i]->grow(maxatom); - if (lists_device[i]!=NULL && lists_device[i]->growflag) - lists_device[i]->grow(maxatom); + if (requests[i]->kokkos_device) { + lists[i] = new NeighListKokkos(lmp); + device_flag = 1; + } else if (requests[i]->kokkos_host) + lists[i] = new NeighListKokkos(lmp); } /* ---------------------------------------------------------------------- */ @@ -281,49 +125,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos() k_ex_mol_bit.modify(); } -/* ---------------------------------------------------------------------- */ - -void NeighborKokkos::choose_build(int index, NeighRequest *rq) -{ - if (rq->kokkos_host != 0) { - PairPtrHost pb = NULL; - if (rq->ghost) { - if (rq->full) { - if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos; - else pb = &NeighborKokkos::full_bin_kokkos; - } - else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos; - } else { - if (rq->full) { - if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos; - else pb = &NeighborKokkos::full_bin_kokkos; - } - else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos; - } - pair_build_host[index] = pb; - } - if (rq->kokkos_device != 0) { - PairPtrDevice pb = NULL; - if (rq->ghost) { - if (rq->full) { - if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos; - else pb = &NeighborKokkos::full_bin_kokkos; - } - else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos; - } else { - if (rq->full) { - if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos; - else pb = &NeighborKokkos::full_bin_kokkos; - } - else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos; - } - pair_build_device[index] = pb; - return; - } - - Neighbor::choose_build(index,rq); -} - /* ---------------------------------------------------------------------- if any atom moved trigger distance (half of neighbor skin) return 1 shrink trigger distance if box size has changed @@ -337,7 +138,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq) int NeighborKokkos::check_distance() { - if (nlist_device) + if (device_flag) check_distance_kokkos(); else check_distance_kokkos(); @@ -417,7 +218,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance, const int void NeighborKokkos::build(int topoflag) { - if (nlist_device) + if (device_flag) build_kokkos(topoflag); else build_kokkos(topoflag); @@ -426,20 +227,30 @@ void NeighborKokkos::build(int topoflag) template void NeighborKokkos::build_kokkos(int topoflag) { + if (style != BIN) + error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists"); + typedef DeviceType device_type; - int i; + int i,m; ago = 0; ncalls++; lastcall = update->ntimestep; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + // check that using special bond flags will not overflow neigh lists + + if (nall > NEIGHMASK) + error->one(FLERR,"Too many local+ghost atoms for neighbor list"); + // store current atom positions and box size if needed if (dist_check) { atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); x = atomKK->k_x; - int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; int maxhold_kokkos = xhold.view().dimension_0(); if (atom->nmax > maxhold || maxhold_kokkos < maxhold) { @@ -471,54 +282,33 @@ void NeighborKokkos::build_kokkos(int topoflag) } } - // if any lists store neighbors of ghosts: - // invoke grow() if nlocal+nghost exceeds previous list size - // else only invoke grow() if nlocal exceeds previous list size - // only for lists with growflag set and which are perpetual (glist) + // bin atoms for all NBin instances + // not just NBin associated with perpetual lists + // b/c cannot wait to bin occasional lists in build_one() call + // if bin then, atoms may have moved outside of proc domain & bin extent, + // leading to errors or even a crash - if (anyghostlist && atom->nmax > maxatom) { - maxatom = atom->nmax; - for (i = 0; i < nglist; i++) - if (lists[glist[i]]) lists[glist[i]]->grow(maxatom); - else init_list_grow_kokkos(glist[i]); - } else if (atom->nmax > maxatom) { - maxatom = atom->nmax; - for (i = 0; i < nglist; i++) - if (lists[glist[i]]) lists[glist[i]]->grow(maxatom); - else init_list_grow_kokkos(glist[i]); - } - - // extend atom bin list if necessary - - if (style != NSQ && atom->nmax > maxbin) { - maxbin = atom->nmax; - memory->destroy(bins); - memory->create(bins,maxbin,"bins"); - } - - // check that using special bond flags will not overflow neigh lists - - if (atom->nlocal+atom->nghost > NEIGHMASK) - error->one(FLERR,"Too many local+ghost atoms for neighbor list"); - - // invoke building of pair and molecular topology neighbor lists - // only for pairwise lists with buildflag set - // blist is for standard neigh lists, otherwise is a Kokkos list - - for (i = 0; i < nblist; i++) { - if (lists[blist[i]]) { - atomKK->sync(Host,ALL_MASK); - (this->*pair_build[blist[i]])(lists[blist[i]]); - } else { - if (lists_host[blist[i]]) - (this->*pair_build_host[blist[i]])(lists_host[blist[i]]); - else if (lists_device[blist[i]]) - (this->*pair_build_device[blist[i]])(lists_device[blist[i]]); + if (style != NSQ) { + for (int i = 0; i < nbin; i++) { + neigh_bin[i]->bin_atoms_setup(nall); + neigh_bin[i]->bin_atoms(); } } - if (atom->molecular && topoflag) - build_topology_kokkos(); + // build pairwise lists for all perpetual NPair/NeighList + // grow() with nlocal/nall args so that only realloc if have to + + atomKK->sync(Host,ALL_MASK); + for (i = 0; i < npair_perpetual; i++) { + m = plist[i]; + lists[m]->grow(nlocal,nall); + neigh_pair[m]->build_setup(); + neigh_pair[m]->build(lists[m]); + } + + // build topology lists for bonds/angles/etc + + if (atom->molecular && topoflag) build_topology(); } template @@ -532,26 +322,6 @@ void NeighborKokkos::operator()(TagNeighborXhold, const int &i) cons /* ---------------------------------------------------------------------- */ -void NeighborKokkos::setup_bins_kokkos(int i) -{ - if (lists_host[slist[i]]) { - lists_host[slist[i]]->stencil_allocate(smax,style); - (this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz); - } else if (lists_device[slist[i]]) { - lists_device[slist[i]]->stencil_allocate(smax,style); - (this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz); - } - - //if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last - - if (maxhead > k_bins.d_view.dimension_0()) { - k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin); - k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead); - } -} - -/* ---------------------------------------------------------------------- */ - void NeighborKokkos::modify_ex_type_grow_kokkos(){ memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type"); k_ex1_type.modify(); @@ -575,8 +345,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){ /* ---------------------------------------------------------------------- */ -void NeighborKokkos::init_topology_kokkos() { - if (nlist_device) { +void NeighborKokkos::init_topology() { + if (device_flag) { neighbond_device.init_topology_kk(); } else { neighbond_host.init_topology_kk(); @@ -588,8 +358,8 @@ void NeighborKokkos::init_topology_kokkos() { normally built with pair lists, but USER-CUDA separates them ------------------------------------------------------------------------- */ -void NeighborKokkos::build_topology_kokkos() { - if (nlist_device) { +void NeighborKokkos::build_topology() { + if (device_flag) { neighbond_device.build_topology_kk(); k_bondlist = neighbond_device.k_bondlist; @@ -606,38 +376,22 @@ void NeighborKokkos::build_topology_kokkos() { k_anglelist.modify(); k_dihedrallist.modify(); k_improperlist.modify(); - - // Transfer topology neighbor lists to Host for non-Kokkos styles - - if (force->bond && force->bond->execution_space == Host) - k_bondlist.sync(); - if (force->angle && force->angle->execution_space == Host) - k_anglelist.sync(); - if (force->dihedral && force->dihedral->execution_space == Host) - k_dihedrallist.sync(); - if (force->improper && force->improper->execution_space == Host) - k_improperlist.sync(); - - } else { + } else { neighbond_host.build_topology_kk(); - + k_bondlist = neighbond_host.k_bondlist; k_anglelist = neighbond_host.k_anglelist; k_dihedrallist = neighbond_host.k_dihedrallist; k_improperlist = neighbond_host.k_improperlist; - + k_bondlist.sync(); k_anglelist.sync(); k_dihedrallist.sync(); k_improperlist.sync(); - + k_bondlist.modify(); k_anglelist.modify(); k_dihedrallist.modify(); k_improperlist.modify(); } } - -// include to trigger instantiation of templated functions - -#include "neigh_full_kokkos.h" diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h index 8c097139a7..244de19dce 100644 --- a/src/KOKKOS/neighbor_kokkos.h +++ b/src/KOKKOS/neighbor_kokkos.h @@ -22,316 +22,6 @@ namespace LAMMPS_NS { -template -class NeighborKokkosExecute -{ - typedef ArrayTypes AT; - - public: - NeighListKokkos neigh_list; - const typename AT::t_xfloat_2d_randomread cutneighsq; - const typename AT::t_int_1d bincount; - const typename AT::t_int_1d_const c_bincount; - typename AT::t_int_2d bins; - typename AT::t_int_2d_const c_bins; - const typename AT::t_x_array_randomread x; - const typename AT::t_int_1d_const type,mask,molecule; - - const typename AT::t_tagint_1d_const tag; - const typename AT::t_tagint_2d_const special; - const typename AT::t_int_2d_const nspecial; - const int molecular; - int moltemplate; - - int special_flag[4]; - - const int nbinx,nbiny,nbinz; - const int mbinx,mbiny,mbinz; - const int mbinxlo,mbinylo,mbinzlo; - const X_FLOAT bininvx,bininvy,bininvz; - X_FLOAT bboxhi[3],bboxlo[3]; - - const int nlocal; - - const int exclude; - - const int nex_type; - const int maxex_type; - const typename AT::t_int_1d_const ex1_type,ex2_type; - const typename AT::t_int_2d_const ex_type; - - const int nex_group; - const int maxex_group; - const typename AT::t_int_1d_const ex1_group,ex2_group; - const typename AT::t_int_1d_const ex1_bit,ex2_bit; - - const int nex_mol; - const int maxex_mol; - const typename AT::t_int_1d_const ex_mol_group; - const typename AT::t_int_1d_const ex_mol_bit; - - typename AT::t_int_scalar resize; - typename AT::t_int_scalar new_maxneighs; - typename ArrayTypes::t_int_scalar h_resize; - typename ArrayTypes::t_int_scalar h_new_maxneighs; - - const int xperiodic, yperiodic, zperiodic; - const int xprd_half, yprd_half, zprd_half; - - NeighborKokkosExecute( - const NeighListKokkos &_neigh_list, - const typename AT::t_xfloat_2d_randomread &_cutneighsq, - const typename AT::t_int_1d &_bincount, - const typename AT::t_int_2d &_bins, - const int _nlocal, - const typename AT::t_x_array_randomread &_x, - const typename AT::t_int_1d_const &_type, - const typename AT::t_int_1d_const &_mask, - const typename AT::t_int_1d_const &_molecule, - const typename AT::t_tagint_1d_const &_tag, - const typename AT::t_tagint_2d_const &_special, - const typename AT::t_int_2d_const &_nspecial, - const int &_molecular, - const int & _nbinx,const int & _nbiny,const int & _nbinz, - const int & _mbinx,const int & _mbiny,const int & _mbinz, - const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, - const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, - const int & _exclude,const int & _nex_type,const int & _maxex_type, - const typename AT::t_int_1d_const & _ex1_type, - const typename AT::t_int_1d_const & _ex2_type, - const typename AT::t_int_2d_const & _ex_type, - const int & _nex_group,const int & _maxex_group, - const typename AT::t_int_1d_const & _ex1_group, - const typename AT::t_int_1d_const & _ex2_group, - const typename AT::t_int_1d_const & _ex1_bit, - const typename AT::t_int_1d_const & _ex2_bit, - const int & _nex_mol,const int & _maxex_mol, - const typename AT::t_int_1d_const & _ex_mol_group, - const typename AT::t_int_1d_const & _ex_mol_bit, - const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, - const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, - const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): - neigh_list(_neigh_list), cutneighsq(_cutneighsq), - bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), - nlocal(_nlocal), - x(_x),type(_type),mask(_mask),molecule(_molecule), - tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), - nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), - mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), - mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), - bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), - exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type), - ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), - nex_group(_nex_group),maxex_group(_maxex_group), - ex1_group(_ex1_group),ex2_group(_ex2_group), - ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol), - ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), - xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), - xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){ - - if (molecular == 2) moltemplate = 1; - else moltemplate = 0; - - bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; - bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - - resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); -#ifndef KOKKOS_USE_CUDA_UVM - h_resize = Kokkos::create_mirror_view(resize); -#else - h_resize = resize; -#endif - h_resize() = 1; - new_maxneighs = typename AT:: - t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); -#ifndef KOKKOS_USE_CUDA_UVM - h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); -#else - h_new_maxneighs = new_maxneighs; -#endif - h_new_maxneighs() = neigh_list.maxneighs; - }; - - ~NeighborKokkosExecute() {neigh_list.clean_copy();}; - - template - KOKKOS_FUNCTION - void build_Item(const int &i) const; - - template - KOKKOS_FUNCTION - void build_Item_Ghost(const int &i) const; - - template - KOKKOS_FUNCTION - void build_cluster_Item(const int &i) const; - -#ifdef KOKKOS_HAVE_CUDA - template - __device__ inline - void build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const; -#endif - - KOKKOS_INLINE_FUNCTION - void binatomsItem(const int &i) const; - - KOKKOS_INLINE_FUNCTION - int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const - { - int ix,iy,iz; - - if (x >= bboxhi[0]) - ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; - else if (x >= bboxlo[0]) { - ix = static_cast ((x-bboxlo[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast ((x-bboxlo[0])*bininvx) - 1; - - if (y >= bboxhi[1]) - iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; - else if (y >= bboxlo[1]) { - iy = static_cast ((y-bboxlo[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast ((y-bboxlo[1])*bininvy) - 1; - - if (z >= bboxhi[2]) - iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; - else if (z >= bboxlo[2]) { - iz = static_cast ((z-bboxlo[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast ((z-bboxlo[2])*bininvz) - 1; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - - KOKKOS_INLINE_FUNCTION - int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const - { - int ix,iy,iz; - - if (x >= bboxhi[0]) - ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; - else if (x >= bboxlo[0]) { - ix = static_cast ((x-bboxlo[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast ((x-bboxlo[0])*bininvx) - 1; - - if (y >= bboxhi[1]) - iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; - else if (y >= bboxlo[1]) { - iy = static_cast ((y-bboxlo[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast ((y-bboxlo[1])*bininvy) - 1; - - if (z >= bboxhi[2]) - iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; - else if (z >= bboxlo[2]) { - iz = static_cast ((z-bboxlo[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast ((z-bboxlo[2])*bininvz) - 1; - - i[0] = ix - mbinxlo; - i[1] = iy - mbinylo; - i[2] = iz - mbinzlo; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - - KOKKOS_INLINE_FUNCTION - int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const; - - KOKKOS_INLINE_FUNCTION - int find_special(const int &i, const int &j) const; - - KOKKOS_INLINE_FUNCTION - int minimum_image_check(double dx, double dy, double dz) const { - if (xperiodic && fabs(dx) > xprd_half) return 1; - if (yperiodic && fabs(dy) > yprd_half) return 1; - if (zperiodic && fabs(dz) > zprd_half) return 1; - return 0; - } - -}; - -template -struct NeighborKokkosBinAtomsFunctor { - typedef Device device_type; - - const NeighborKokkosExecute c; - - NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute &_c): - c(_c) {}; - ~NeighborKokkosBinAtomsFunctor() {} - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.binatomsItem(i); - } -}; - -template -struct NeighborKokkosBuildFunctor { - typedef Device device_type; - - const NeighborKokkosExecute c; - const size_t sharedsize; - - NeighborKokkosBuildFunctor(const NeighborKokkosExecute &_c, - const size_t _sharedsize):c(_c), - sharedsize(_sharedsize) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.template build_Item(i); - } -#ifdef KOKKOS_HAVE_CUDA - KOKKOS_INLINE_FUNCTION - void operator() (typename Kokkos::TeamPolicy::member_type dev) const { - c.template build_ItemCuda(dev); - } - size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } -#endif -}; - -template -struct NeighborKokkosBuildFunctorGhost { - typedef Device device_type; - - const NeighborKokkosExecute c; - const size_t sharedsize; - - NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c, - const size_t _sharedsize):c(_c), - sharedsize(_sharedsize) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.template build_Item_Ghost(i); - } -}; - -template -struct NeighborClusterKokkosBuildFunctor { - typedef Device device_type; - - const NeighborKokkosExecute c; - const size_t sharedsize; - - NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute &_c, - const size_t _sharedsize):c(_c), - sharedsize(_sharedsize) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.template build_cluster_Item(i); - } -}; - template struct TagNeighborCheckDistance{}; @@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor { public: typedef int value_type; - - - int nlist_host; // pairwise neighbor lists on Host - NeighListKokkos **lists_host; - int nlist_device; // pairwise neighbor lists on Device - NeighListKokkos **lists_device; - - NeighBondKokkos neighbond_host; - NeighBondKokkos neighbond_device; - - DAT::tdual_int_2d k_bondlist; - DAT::tdual_int_2d k_anglelist; - DAT::tdual_int_2d k_dihedrallist; - DAT::tdual_int_2d k_improperlist; - NeighborKokkos(class LAMMPS *); ~NeighborKokkos(); void init(); + void init_topology(); + void build_topology(); template KOKKOS_INLINE_FUNCTION @@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor { KOKKOS_INLINE_FUNCTION void operator()(TagNeighborXhold, const int&) const; - private: - int atoms_per_bin; DAT::tdual_xfloat_2d k_cutneighsq; - DAT::tdual_int_1d k_bincount; - DAT::tdual_int_2d k_bins; DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; @@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor { DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; + NeighBondKokkos neighbond_host; + NeighBondKokkos neighbond_device; + + DAT::tdual_int_2d k_bondlist; + DAT::tdual_int_2d k_anglelist; + DAT::tdual_int_2d k_dihedrallist; + DAT::tdual_int_2d k_improperlist; + + private: + DAT::tdual_x_array x; DAT::tdual_x_array xhold; @@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor { int device_flag; void init_cutneighsq_kokkos(int); - int init_lists_kokkos(); - void init_list_flags1_kokkos(int); - void init_list_flags2_kokkos(int); - void init_list_grow_kokkos(int); + void create_kokkos_list(int); void init_ex_type_kokkos(int); void init_ex_bit_kokkos(); void init_ex_mol_bit_kokkos(); - void choose_build(int, NeighRequest *); virtual int check_distance(); template int check_distance_kokkos(); virtual void build(int); @@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor { void modify_ex_type_grow_kokkos(); void modify_ex_group_grow_kokkos(); void modify_mol_group_grow_kokkos(); - void init_topology_kokkos(); - void build_topology_kokkos(); - - typedef void (NeighborKokkos::*PairPtrHost) - (class NeighListKokkos *); - PairPtrHost *pair_build_host; - typedef void (NeighborKokkos::*PairPtrDevice) - (class NeighListKokkos *); - PairPtrDevice *pair_build_device; - - template - void full_bin_kokkos(NeighListKokkos *list); - template - void full_bin_cluster_kokkos(NeighListKokkos *list); - - typedef void (NeighborKokkos::*StencilPtrHost) - (class NeighListKokkos *, int, int, int); - StencilPtrHost *stencil_create_host; - typedef void (NeighborKokkos::*StencilPtrDevice) - (class NeighListKokkos *, int, int, int); - StencilPtrDevice *stencil_create_device; }; } diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp new file mode 100644 index 0000000000..f987304452 --- /dev/null +++ b/src/KOKKOS/npair_kokkos.cpp @@ -0,0 +1,746 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_kokkos.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain_kokkos.h" +#include "neighbor_kokkos.h" +#include "nbin_kokkos.h" +#include "nstencil.h" +#include "force.h" + +namespace LAMMPS_NS { + +/* ---------------------------------------------------------------------- */ + +template +NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { + +} + +/* ---------------------------------------------------------------------- + copy needed info from Neighbor class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairKokkos::copy_neighbor_info() +{ + NPair::copy_neighbor_info(); + + NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor; + + // general params + + newton_pair = force->newton_pair; + k_cutneighsq = neighborKK->k_cutneighsq; + + // exclusion info + + k_ex1_type = neighborKK->k_ex1_type; + k_ex2_type = neighborKK->k_ex2_type; + k_ex_type = neighborKK->k_ex_type; + k_ex1_group = neighborKK->k_ex1_group; + k_ex2_group = neighborKK->k_ex1_group; + k_ex1_bit = neighborKK->k_ex1_group; + k_ex2_bit = neighborKK->k_ex1_group; + k_ex_mol_group = neighborKK->k_ex_mol_group; + k_ex_mol_bit = neighborKK->k_ex_mol_bit; +} + +/* ---------------------------------------------------------------------- + copy per-atom and per-bin vectors from NBin class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairKokkos::copy_bin_info() +{ + NPair::copy_bin_info(); + + NBinKokkos* nbKK = (NBinKokkos*) nb; + + atoms_per_bin = nbKK->atoms_per_bin; + k_bincount = nbKK->k_bincount; + k_bins = nbKK->k_bins; +} + +/* ---------------------------------------------------------------------- + copy needed info from NStencil class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairKokkos::copy_stencil_info() +{ + NPair::copy_stencil_info(); + + nstencil = ns->nstencil; + + int maxstencil = ns->get_maxstencil(); + + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + for (int k = 0; k < maxstencil; k++) + k_stencil.h_view(k) = ns->stencil[k]; + k_stencil.modify(); + k_stencil.sync(); + if (GHOST) { + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + for (int k = 0; k < maxstencil; k++) { + k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; + k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; + k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + } + k_stencilxyz.modify(); + k_stencilxyz.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void NPairKokkos::build(NeighList *list_) +{ + NeighListKokkos* list = (NeighListKokkos*) list_; + const int nlocal = includegroup?atom->nfirst:atom->nlocal; + int nall = nlocal; + if (GHOST) + nall += atom->nghost; + list->grow(nall); + + NeighborKokkosExecute + data(*list, + k_cutneighsq.view(), + k_bincount.view(), + k_bins.view(), + nstencil, + k_stencil.view(), + k_stencilxyz.view(), + nlocal, + atomKK->k_x.view(), + atomKK->k_type.view(), + atomKK->k_mask.view(), + atomKK->k_molecule.view(), + atomKK->k_tag.view(), + atomKK->k_special.view(), + atomKK->k_nspecial.view(), + atomKK->molecular, + nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, + bininvx,bininvy,bininvz, + exclude, nex_type,maxex_type, + k_ex1_type.view(), + k_ex2_type.view(), + k_ex_type.view(), + nex_group,maxex_group, + k_ex1_group.view(), + k_ex2_group.view(), + k_ex1_bit.view(), + k_ex2_bit.view(), + nex_mol, maxex_mol, + k_ex_mol_group.view(), + k_ex_mol_bit.view(), + bboxhi,bboxlo, + domain->xperiodic,domain->yperiodic,domain->zperiodic, + domain->xprd_half,domain->yprd_half,domain->zprd_half); + + k_cutneighsq.sync(); + k_ex1_type.sync(); + k_ex2_type.sync(); + k_ex_type.sync(); + k_ex1_group.sync(); + k_ex2_group.sync(); + k_ex1_bit.sync(); + k_ex2_bit.sync(); + k_ex_mol_group.sync(); + k_ex_mol_bit.sync(); + atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + + data.special_flag[0] = special_flag[0]; + data.special_flag[1] = special_flag[1]; + data.special_flag[2] = special_flag[2]; + data.special_flag[3] = special_flag[3]; + + if(list->d_neighbors.dimension_0()d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs); + list->d_numneigh = typename ArrayTypes::t_int_1d("numneigh", nall*1.1); + data.neigh_list.d_neighbors = list->d_neighbors; + data.neigh_list.d_numneigh = list->d_numneigh; + } + data.h_resize()=1; + while(data.h_resize()) { + data.h_new_maxneighs() = list->maxneighs; + data.h_resize() = 0; + + Kokkos::deep_copy(data.resize, data.h_resize); + Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); +#ifdef KOKKOS_HAVE_CUDA + #define BINS_PER_BLOCK 2 + const int factor = atoms_per_bin<64?2:1; + Kokkos::TeamPolicy config((mbins+factor-1)/factor,atoms_per_bin*factor); +#else + const int factor = 1; +#endif + +if (GHOST) { + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + Kokkos::parallel_for(nall, f); +} else { + if (newton_pair) { + NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + Kokkos::parallel_for(config, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } else { + NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef KOKKOS_HAVE_CUDA + Kokkos::parallel_for(config, f); +#else + Kokkos::parallel_for(nall, f); +#endif + } +} + DeviceType::fence(); + deep_copy(data.h_resize, data.resize); + + if(data.h_resize()) { + deep_copy(data.h_new_maxneighs, data.new_maxneighs); + list->maxneighs = data.h_new_maxneighs() * 1.2; + list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs); + data.neigh_list.d_neighbors = list->d_neighbors; + data.neigh_list.maxneighs = list->maxneighs; + } + } + + if (GHOST) { + list->inum = atom->nlocal; + list->gnum = nall - atom->nlocal; + } else { + list->inum = nall; + list->gnum = 0; + } + + list->k_ilist.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int NeighborKokkosExecute::find_special(const int &i, const int &j) const +{ + const int n1 = nspecial(i,0); + const int n2 = nspecial(i,1); + const int n3 = nspecial(i,2); + + for (int k = 0; k < n3; k++) { + if (special(i,k) == tag(j)) { + if (k < n1) { + if (special_flag[1] == 0) return -1; + else if (special_flag[1] == 1) return 0; + else return 1; + } else if (k < n2) { + if (special_flag[2] == 0) return -1; + else if (special_flag[2] == 1) return 0; + else return 2; + } else { + if (special_flag[3] == 0) return -1; + else if (special_flag[3] == 1) return 0; + else return 3; + } + } + } + return 0; +}; + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int NeighborKokkosExecute::exclusion(const int &i,const int &j, + const int &itype,const int &jtype) const +{ + int m; + + if (nex_type && ex_type(itype,jtype)) return 1; + + if (nex_group) { + for (m = 0; m < nex_group; m++) { + if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1; + if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1; + } + } + + if (nex_mol) { + for (m = 0; m < nex_mol; m++) + if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) && + molecule(i) == molecule(j)) return 1; + } + + return 0; +} + +/* ---------------------------------------------------------------------- */ + +template template +void NeighborKokkosExecute:: + build_Item(const int &i) const +{ + /* if necessary, goto next page and add pages */ + int n = 0; + int which = 0; + int moltemplate; + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; + // get subview of neighbors of i + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + + const int ibin = coord2bin(xtmp, ytmp, ztmp); + + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + + // loop over all bins in neighborhood (includes ibin) + if(HalfNeigh) + for(int m = 0; m < c_bincount(ibin); m++) { + const int j = c_bins(ibin,m); + const int jtype = type(j); + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists + if((j == i) || (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) && + ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n::t_int_1d_const_um =Kokkos::subview(bins,jbin,ALL); + for(int m = 0; m < c_bincount(jbin); m++) { + + const int j = c_bins(jbin,m); + const int jtype = type(j); + + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n= neigh_list.maxneighs) { + resize() = 1; + + if(n >= new_maxneighs()) new_maxneighs() = n; + } + neigh_list.d_ilist(i) = i; +} + +/* ---------------------------------------------------------------------- */ + +#ifdef KOKKOS_HAVE_CUDA +extern __shared__ X_FLOAT sharedmem[]; + +/* ---------------------------------------------------------------------- */ + +template template +__device__ inline +void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const +{ + /* loop over atoms in i's bin, + */ + const int atoms_per_bin = c_bins.dimension_1(); + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); + const int MY_BIN = dev.team_rank()/atoms_per_bin; + + const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; + + if(ibin >=c_bincount.dimension_0()) return; + X_FLOAT* other_x = sharedmem; + other_x = other_x + 5*atoms_per_bin*MY_BIN; + + int* other_id = (int*) &other_x[4 * atoms_per_bin]; + + int bincount_current = c_bincount[ibin]; + + for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { + const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); + const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; + /* if necessary, goto next page and add pages */ + + int n = 0; + + X_FLOAT xtmp; + X_FLOAT ytmp; + X_FLOAT ztmp; + int itype; + const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i= 0) { + xtmp = x(i, 0); + ytmp = x(i, 1); + ztmp = x(i, 2); + itype = type(i); + other_x[MY_II] = xtmp; + other_x[MY_II + atoms_per_bin] = ytmp; + other_x[MY_II + 2 * atoms_per_bin] = ztmp; + other_x[MY_II + 3 * atoms_per_bin] = itype; + } + other_id[MY_II] = i; + int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); + + if(test) return; + + if(i >= 0 && i < nlocal) { + #pragma unroll 4 + for(int m = 0; m < bincount_current; m++) { + int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists + if((j == i) || + (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && + ((j < i) || + ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n::t_int_1d_const_um stencil + = d_stencil; + for(int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + + if(ibin == jbin) continue; + + bincount_current = c_bincount[jbin]; + int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; + + if(j >= 0) { + other_x[MY_II] = x(j, 0); + other_x[MY_II + atoms_per_bin] = x(j, 1); + other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); + other_x[MY_II + 3 * atoms_per_bin] = type(j); + } + + other_id[MY_II] = j; + + __syncthreads(); + + if(i >= 0 && i < nlocal) { + #pragma unroll 8 + for(int m = 0; m < bincount_current; m++) { + const int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + //if(HalfNeigh && (j < i)) continue; + if(HalfNeigh && !Newton && (j < i)) continue; + if(!HalfNeigh && j==i) continue; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n= 0 && i < nlocal) { + neigh_list.d_numneigh(i) = n; + neigh_list.d_ilist(i) = i; + } + + if(n >= neigh_list.maxneighs) { + resize() = 1; + + if(n >= new_maxneighs()) new_maxneighs() = n; + } + } +} +#endif + +/* ---------------------------------------------------------------------- */ + +template template +void NeighborKokkosExecute:: + build_Item_Ghost(const int &i) const +{ + /* if necessary, goto next page and add pages */ + int n = 0; + int which = 0; + int moltemplate; + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; + // get subview of neighbors of i + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + const typename ArrayTypes::t_int_1d_3_const_um stencilxyz + = d_stencilxyz; + + // loop over all atoms in surrounding bins in stencil including self + // when i is a ghost atom, must check if stencil bin is out of bounds + // skip i = j + // no molecular test when i = ghost atom + + if (i < nlocal) { + const int ibin = coord2bin(xtmp, ytmp, ztmp); + for (int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; + for(int m = 0; m < c_bincount(jbin); m++) { + const int j = c_bins(jbin,m); + + if (HalfNeigh && j <= i) continue; + else if (j == i) continue; + + const int jtype = type[j]; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n= mbinx || + ybin2 < 0 || ybin2 >= mbiny || + zbin2 < 0 || zbin2 >= mbinz) continue; + const int jbin = ibin + stencil[k]; + for(int m = 0; m < c_bincount(jbin); m++) { + const int j = c_bins(jbin,m); + + if (HalfNeigh && j <= i) continue; + else if (j == i) continue; + + const int jtype = type[j]; + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq(itype,jtype)) { + if(n= neigh_list.maxneighs) { + resize() = 1; + + if(n >= new_maxneighs()) new_maxneighs() = n; + } + neigh_list.d_ilist(i) = i; +} + +} + +namespace LAMMPS_NS { +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +#endif +} diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h new file mode 100644 index 0000000000..666508a22d --- /dev/null +++ b/src/KOKKOS/npair_kokkos.h @@ -0,0 +1,435 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS + +typedef NPairKokkos NPairKokkosFullBinHost; +NPairStyle(full/bin/kk/host, + NPairKokkosFullBinHost, + NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosFullBinDevice; +NPairStyle(full/bin/kk/device, + NPairKokkosFullBinDevice, + NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosFullBinGhostHost; +NPairStyle(full/bin/ghost/kk/host, + NPairKokkosFullBinGhostHost, + NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosFullBinGhostDevice; +NPairStyle(full/bin/ghost/kk/device, + NPairKokkosFullBinGhostDevice, + NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosHalfBinHost; +NPairStyle(half/bin/kk/host, + NPairKokkosHalfBinHost, + NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosHalfBinDevice; +NPairStyle(half/bin/kk/device, + NPairKokkosHalfBinDevice, + NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosHalfBinGhostHost; +NPairStyle(half/bin/ghost/kk/host, + NPairKokkosHalfBinGhostHost, + NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) + +typedef NPairKokkos NPairKokkosHalfBinGhostDevice; +NPairStyle(half/bin/ghost/kk/device, + NPairKokkosHalfBinGhostDevice, + NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) + +#else + +#ifndef LMP_NPAIR_KOKKOS_H +#define LMP_NPAIR_KOKKOS_H + +#include "npair.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class NPairKokkos : public NPair { + public: + NPairKokkos(class LAMMPS *); + ~NPairKokkos() {} + void copy_neighbor_info(); + void copy_bin_info(); + void copy_stencil_info(); + void build(class NeighList *); + + private: + int newton_pair; + int nex_type; + int maxex_type; + + int nex_group; + int maxex_group; + + int nex_mol; + int maxex_mol; + + // data from Neighbor class + + DAT::tdual_xfloat_2d k_cutneighsq; + + // exclusion data from Neighbor class + + DAT::tdual_int_1d k_ex1_type,k_ex2_type; + DAT::tdual_int_2d k_ex_type; + DAT::tdual_int_1d k_ex1_group,k_ex2_group; + DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; + DAT::tdual_int_1d k_ex_mol_group; + DAT::tdual_int_1d k_ex_mol_bit; + + // data from NBin class + + int atoms_per_bin; + DAT::tdual_int_1d k_bincount; + DAT::tdual_int_2d k_bins; + + // data from NStencil class + + int nstencil; + DAT::tdual_int_1d k_stencil; // # of J neighs for each I + DAT::tdual_int_1d_3 k_stencilxyz; +}; + +template +class NeighborKokkosExecute +{ + typedef ArrayTypes AT; + + public: + NeighListKokkos neigh_list; + + // data from Neighbor class + + const typename AT::t_xfloat_2d_randomread cutneighsq; + + // exclusion data from Neighbor class + + const int exclude; + + const int nex_type; + const int maxex_type; + const typename AT::t_int_1d_const ex1_type,ex2_type; + const typename AT::t_int_2d_const ex_type; + + const int nex_group; + const int maxex_group; + const typename AT::t_int_1d_const ex1_group,ex2_group; + const typename AT::t_int_1d_const ex1_bit,ex2_bit; + + const int nex_mol; + const int maxex_mol; + const typename AT::t_int_1d_const ex_mol_group; + const typename AT::t_int_1d_const ex_mol_bit; + + // data from NBin class + + const typename AT::t_int_1d bincount; + const typename AT::t_int_1d_const c_bincount; + typename AT::t_int_2d bins; + typename AT::t_int_2d_const c_bins; + + + // data from NStencil class + + int nstencil; + typename AT::t_int_1d d_stencil; // # of J neighs for each I + typename AT::t_int_1d_3 d_stencilxyz; + + // data from Atom class + + const typename AT::t_x_array_randomread x; + const typename AT::t_int_1d_const type,mask,molecule; + const typename AT::t_tagint_1d_const tag; + const typename AT::t_tagint_2d_const special; + const typename AT::t_int_2d_const nspecial; + const int molecular; + int moltemplate; + + int special_flag[4]; + + const int nbinx,nbiny,nbinz; + const int mbinx,mbiny,mbinz; + const int mbinxlo,mbinylo,mbinzlo; + const X_FLOAT bininvx,bininvy,bininvz; + X_FLOAT bboxhi[3],bboxlo[3]; + + const int nlocal; + + typename AT::t_int_scalar resize; + typename AT::t_int_scalar new_maxneighs; + typename ArrayTypes::t_int_scalar h_resize; + typename ArrayTypes::t_int_scalar h_new_maxneighs; + + const int xperiodic, yperiodic, zperiodic; + const int xprd_half, yprd_half, zprd_half; + + NeighborKokkosExecute( + const NeighListKokkos &_neigh_list, + const typename AT::t_xfloat_2d_randomread &_cutneighsq, + const typename AT::t_int_1d &_bincount, + const typename AT::t_int_2d &_bins, + const int _nstencil, + const typename AT::t_int_1d &_d_stencil, + const typename AT::t_int_1d_3 &_d_stencilxyz, + const int _nlocal, + const typename AT::t_x_array_randomread &_x, + const typename AT::t_int_1d_const &_type, + const typename AT::t_int_1d_const &_mask, + const typename AT::t_int_1d_const &_molecule, + const typename AT::t_tagint_1d_const &_tag, + const typename AT::t_tagint_2d_const &_special, + const typename AT::t_int_2d_const &_nspecial, + const int &_molecular, + const int & _nbinx,const int & _nbiny,const int & _nbinz, + const int & _mbinx,const int & _mbiny,const int & _mbinz, + const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, + const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, + const int & _exclude,const int & _nex_type,const int & _maxex_type, + const typename AT::t_int_1d_const & _ex1_type, + const typename AT::t_int_1d_const & _ex2_type, + const typename AT::t_int_2d_const & _ex_type, + const int & _nex_group,const int & _maxex_group, + const typename AT::t_int_1d_const & _ex1_group, + const typename AT::t_int_1d_const & _ex2_group, + const typename AT::t_int_1d_const & _ex1_bit, + const typename AT::t_int_1d_const & _ex2_bit, + const int & _nex_mol,const int & _maxex_mol, + const typename AT::t_int_1d_const & _ex_mol_group, + const typename AT::t_int_1d_const & _ex_mol_bit, + const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, + const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, + const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): + neigh_list(_neigh_list), cutneighsq(_cutneighsq), + bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), + nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), + nlocal(_nlocal), + x(_x),type(_type),mask(_mask),molecule(_molecule), + tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), + nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), + mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), + mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), + bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), + exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type), + ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), + nex_group(_nex_group),maxex_group(_maxex_group), + ex1_group(_ex1_group),ex2_group(_ex2_group), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol), + ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), + xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), + xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { + + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; + + bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; + bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; + + resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(resize); +#else + h_resize = resize; +#endif + h_resize() = 1; + new_maxneighs = typename AT:: + t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); +#ifndef KOKKOS_USE_CUDA_UVM + h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); +#else + h_new_maxneighs = new_maxneighs; +#endif + h_new_maxneighs() = neigh_list.maxneighs; + }; + + ~NeighborKokkosExecute() {neigh_list.clean_copy();}; + + template + KOKKOS_FUNCTION + void build_Item(const int &i) const; + + template + KOKKOS_FUNCTION + void build_Item_Ghost(const int &i) const; + +#ifdef KOKKOS_HAVE_CUDA + template + __device__ inline + void build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const; +#endif + + KOKKOS_INLINE_FUNCTION + void binatomsItem(const int &i) const; + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const + { + int ix,iy,iz; + + if (x >= bboxhi[0]) + ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; + else if (x >= bboxlo[0]) { + ix = static_cast ((x-bboxlo[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo[0])*bininvx) - 1; + + if (y >= bboxhi[1]) + iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; + else if (y >= bboxlo[1]) { + iy = static_cast ((y-bboxlo[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo[1])*bininvy) - 1; + + if (z >= bboxhi[2]) + iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; + else if (z >= bboxlo[2]) { + iz = static_cast ((z-bboxlo[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo[2])*bininvz) - 1; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const + { + int ix,iy,iz; + + if (x >= bboxhi[0]) + ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; + else if (x >= bboxlo[0]) { + ix = static_cast ((x-bboxlo[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo[0])*bininvx) - 1; + + if (y >= bboxhi[1]) + iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; + else if (y >= bboxlo[1]) { + iy = static_cast ((y-bboxlo[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo[1])*bininvy) - 1; + + if (z >= bboxhi[2]) + iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; + else if (z >= bboxlo[2]) { + iz = static_cast ((z-bboxlo[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo[2])*bininvz) - 1; + + i[0] = ix - mbinxlo; + i[1] = iy - mbinylo; + i[2] = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + KOKKOS_INLINE_FUNCTION + int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const; + + KOKKOS_INLINE_FUNCTION + int find_special(const int &i, const int &j) const; + + KOKKOS_INLINE_FUNCTION + int minimum_image_check(double dx, double dy, double dz) const { + if (xperiodic && fabs(dx) > xprd_half) return 1; + if (yperiodic && fabs(dy) > yprd_half) return 1; + if (zperiodic && fabs(dz) > zprd_half) return 1; + return 0; + } + +}; + +template +struct NPairKokkosBuildFunctor { + typedef DeviceType device_type; + + const NeighborKokkosExecute c; + const size_t sharedsize; + + NPairKokkosBuildFunctor(const NeighborKokkosExecute &_c, + const size_t _sharedsize):c(_c), + sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_Item(i); + } +#ifdef KOKKOS_HAVE_CUDA + __device__ inline + + void operator() (typename Kokkos::TeamPolicy::member_type dev) const { + c.template build_ItemCuda(dev); + } + size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } +#endif +}; + +template +struct NPairKokkosBuildFunctor { + typedef LMPHostType device_type; + + const NeighborKokkosExecute c; + const size_t sharedsize; + + NPairKokkosBuildFunctor(const NeighborKokkosExecute &_c, + const size_t _sharedsize):c(_c), + sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_Item(i); + } + + void operator() (typename Kokkos::TeamPolicy::member_type dev) const {} +}; + +template +struct NPairKokkosBuildFunctorGhost { + typedef DeviceType device_type; + + const NeighborKokkosExecute c; + const size_t sharedsize; + + NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c, + const size_t _sharedsize):c(_c), + sharedsize(_sharedsize) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_Item_Ghost(i); + } +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp index 4c431bb427..a176ca2be4 100644 --- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp @@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk"); } diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp index a7e6deb43f..413f38370d 100644 --- a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp @@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk"); } diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp index 50d65b4b6d..02f767fa03 100644 --- a/src/KOKKOS/pair_buck_kokkos.cpp +++ b/src/KOKKOS/pair_buck_kokkos.cpp @@ -79,7 +79,7 @@ void PairBuckKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -233,19 +233,12 @@ void PairBuckKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk"); } diff --git a/src/KOKKOS/pair_buck_kokkos.h b/src/KOKKOS/pair_buck_kokkos.h index 23ba049f9d..e95fa903fe 100644 --- a/src/KOKKOS/pair_buck_kokkos.h +++ b/src/KOKKOS/pair_buck_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template class PairBuckKokkos : public PairBuck { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairBuckKokkos(class LAMMPS *); @@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck { friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_fullcluster(PairBuckKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairBuckKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairBuckKokkos*); }; diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp index 7b0fbad7e5..19d4306317 100644 --- a/src/KOKKOS/pair_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp @@ -78,7 +78,7 @@ void PairCoulCutKokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -215,11 +215,9 @@ void PairCoulCutKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk"); } diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp index c4b78b8910..9a6e1b8020 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp @@ -85,7 +85,7 @@ void PairCoulDebyeKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -257,19 +257,12 @@ void PairCoulDebyeKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk"); } diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index 503cdc280d..e689754d0a 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -221,11 +221,9 @@ void PairCoulDSFKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk"); } diff --git a/src/KOKKOS/pair_coul_long_kokkos.cpp b/src/KOKKOS/pair_coul_long_kokkos.cpp index 95b6734e94..7536549bf4 100644 --- a/src/KOKKOS/pair_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_coul_long_kokkos.cpp @@ -102,7 +102,7 @@ void PairCoulLongKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -408,11 +408,9 @@ void PairCoulLongKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk"); } diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index 774580c929..1785ba2731 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -222,11 +222,9 @@ void PairCoulWolfKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk"); } diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 151d89d2b0..f3b7c36106 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -286,11 +286,9 @@ void PairEAMAlloyKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy"); } diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b503d1e83a..ba450b0872 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -291,11 +291,9 @@ void PairEAMFSKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs"); } diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index d91da280ac..3d8223ed66 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -281,11 +281,9 @@ void PairEAMKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk"); } diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 3710c460c0..1e01b3df15 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -333,145 +333,6 @@ struct PairComputeFunctor { } }; -template -struct PairComputeFunctor { - typedef typename PairStyle::device_type device_type ; - typedef EV_FLOAT value_type; - - PairStyle c; - NeighListKokkos list; - - PairComputeFunctor(PairStyle* c_ptr, - NeighListKokkos* list_ptr): - c(*c_ptr),list(*list_ptr) {}; - ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; - - KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { - return j >> SBBITS & 3; - } - - template - KOKKOS_FUNCTION - EV_FLOAT compute_item(const typename Kokkos::TeamPolicy::member_type& dev, - const NeighListKokkos &list, const NoCoulTag& ) const { - EV_FLOAT ev; - int i = dev.league_rank()*dev.team_size() + dev.team_rank(); - - const X_FLOAT xtmp = c.c_x(i,0); - const X_FLOAT ytmp = c.c_x(i,1); - const X_FLOAT ztmp = c.c_x(i,2); - int itype = c.type(i); - - const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); - const int jnum = list.d_numneigh[i]; - - F_FLOAT3 ftmp; - - for (int jj = 0; jj < jnum; jj++) { - int jjj = neighbors_i(jj); - - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) { - const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)]; - const int j = (jjj + k)&NEIGHMASK; - if((j==i)||(j>=c.nall)) return; - const X_FLOAT delx = xtmp - c.c_x(j,0); - const X_FLOAT dely = ytmp - c.c_x(j,1); - const X_FLOAT delz = ztmp - c.c_x(j,2); - const int jtype = c.type(j); - const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz); - - if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { - - const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); - fftmp.x += delx*fpair; - fftmp.y += dely*fpair; - fftmp.z += delz*fpair; - - if (EVFLAG) { - F_FLOAT evdwl = 0.0; - if (c.eflag) { - evdwl = 0.5* - factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); - ev.evdwl += evdwl; - } - - if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); - } - } - },ftmp); - } - - Kokkos::single(Kokkos::PerThread(dev), [&]() { - c.f(i,0) += ftmp.x; - c.f(i,1) += ftmp.y; - c.f(i,2) += ftmp.z; - }); - - return ev; - } - - KOKKOS_INLINE_FUNCTION - void ev_tally(EV_FLOAT &ev, const int &i, const int &j, - const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, - const F_FLOAT &dely, const F_FLOAT &delz) const - { - const int EFLAG = c.eflag; - const int NEWTON_PAIR = c.newton_pair; - const int VFLAG = c.vflag_either; - - if (EFLAG) { - if (c.eflag_atom) { - const E_FLOAT epairhalf = 0.5 * epair; - if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf; - if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf; - } - } - - if (VFLAG) { - const E_FLOAT v0 = delx*delx*fpair; - const E_FLOAT v1 = dely*dely*fpair; - const E_FLOAT v2 = delz*delz*fpair; - const E_FLOAT v3 = delx*dely*fpair; - const E_FLOAT v4 = delx*delz*fpair; - const E_FLOAT v5 = dely*delz*fpair; - - if (c.vflag_global) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - - if (c.vflag_atom) { - if (i < c.nlocal) { - c.d_vatom(i,0) += 0.5*v0; - c.d_vatom(i,1) += 0.5*v1; - c.d_vatom(i,2) += 0.5*v2; - c.d_vatom(i,3) += 0.5*v3; - c.d_vatom(i,4) += 0.5*v4; - c.d_vatom(i,5) += 0.5*v5; - } - } - } - } - - KOKKOS_INLINE_FUNCTION - void operator()(const typename Kokkos::TeamPolicy::member_type& dev) const { - if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul::type()); - else compute_item<0,0>(dev,list,typename DoCoul::type()); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const typename Kokkos::TeamPolicy::member_type& dev, value_type &energy_virial) const { - if (c.newton_pair) - energy_virial += compute_item<1,1>(dev,list,typename DoCoul::type()); - else - energy_virial += compute_item<1,0>(dev,list,typename DoCoul::type()); - } -}; - template struct PairComputeFunctor { typedef typename PairStyle::device_type device_type ; @@ -607,8 +468,8 @@ struct PairComputeFunctor { // The enable_if clause will invalidate the last parameter of the function, so that // a match is only achieved, if PairStyle supports the specific neighborlist variant. // This uses the fact that failure to match template parameters is not an error. -// By having the enable_if with a ! and without it, exactly one of the two versions of the functions -// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version +// By having the enable_if with a ! and without it, exactly one of the functions +// pair_compute_neighlist will match - either the dummy version // or the real one further below. template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if*>::type list) { @@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable return ev; } -template -EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if*>::type list) { - EV_FLOAT ev; - (void) fpair; - (void) list; - printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags); - return ev; -} - // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2 template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { @@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable return ev; } -// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER -template -EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { - EV_FLOAT ev; - if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - typedef PairComputeFunctor - f_type; - f_type ff(fpair, list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); - else Kokkos::parallel_for(config,ff); - } else { - typedef PairComputeFunctor - f_type; - f_type ff(fpair, list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); - else Kokkos::parallel_for(config,ff); - } - return ev; -} - - template EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos* list) { EV_FLOAT ev; @@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos (fpair,list); } else if (fpair->neighflag == N2) { ev = pair_compute_neighlist (fpair,list); - } else if (fpair->neighflag == FULLCLUSTER) { - ev = pair_compute_fullcluster (fpair,list); } return ev; } diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp index d438e64e7d..914711a8e5 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp @@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos::compute(int eflag_in, int eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk"); } diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp index 4e125235f4..4af6a896d0 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp @@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos::compute(int eflag_in, int vflag_i eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk"); } diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index 3b2b13f40b..5efba2742d 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk"); } diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp index 87cd1cb7e1..96507a599e 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp @@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk"); } diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp index 297a764dda..2d1abc9cd3 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp @@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk"); } diff --git a/src/KOKKOS/pair_lj_class2_kokkos.cpp b/src/KOKKOS/pair_lj_class2_kokkos.cpp index a263e81e0e..b5c4c19b8e 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_kokkos.cpp @@ -87,7 +87,7 @@ void PairLJClass2Kokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -227,19 +227,12 @@ void PairLJClass2Kokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk"); } diff --git a/src/KOKKOS/pair_lj_class2_kokkos.h b/src/KOKKOS/pair_lj_class2_kokkos.h index 8dcabe5b0c..e8ac07da80 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template class PairLJClass2Kokkos : public PairLJClass2 { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairLJClass2Kokkos(class LAMMPS *); @@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 { friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_fullcluster(PairLJClass2Kokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJClass2Kokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJClass2Kokkos*); }; diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp index b6071880cf..e68ec5579c 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp @@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk"); } diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp index 1da18f0afe..f4011b6f5c 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp @@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk"); } diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp index 46cb0a96dc..13c930a15b 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp @@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk"); } diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp index 2a1a124460..42319cfa99 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk"); } diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index 2ad7f2d014..5f2805622a 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -87,7 +87,7 @@ void PairLJCutKokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -245,19 +245,12 @@ void PairLJCutKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk"); } diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h index 16efd3d2ef..b779874fe8 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template class PairLJCutKokkos : public PairLJCut { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairLJCutKokkos(class LAMMPS *); @@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut { friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_fullcluster(PairLJCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJCutKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutKokkos*); }; diff --git a/src/KOKKOS/pair_lj_expand_kokkos.cpp b/src/KOKKOS/pair_lj_expand_kokkos.cpp index 3e1d185d2f..3ed03f0d0b 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.cpp +++ b/src/KOKKOS/pair_lj_expand_kokkos.cpp @@ -86,7 +86,7 @@ void PairLJExpandKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -230,19 +230,12 @@ void PairLJExpandKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk"); } diff --git a/src/KOKKOS/pair_lj_expand_kokkos.h b/src/KOKKOS/pair_lj_expand_kokkos.h index 172ccaae73..339950a6b2 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.h +++ b/src/KOKKOS/pair_lj_expand_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template class PairLJExpandKokkos : public PairLJExpand { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairLJExpandKokkos(class LAMMPS *); @@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand { friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_fullcluster(PairLJExpandKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJExpandKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJExpandKokkos*); }; diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp index c764af303f..943cf988c9 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp @@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos::compute(int eflag_in, int vflag eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk"); } diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp index 2f144599ac..bb4dcb39bf 100644 --- a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp @@ -98,7 +98,7 @@ void PairLJGromacsKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -277,11 +277,9 @@ void PairLJGromacsKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk"); } diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.cpp b/src/KOKKOS/pair_lj_sdk_kokkos.cpp index 74183dff0b..46715e6fa3 100644 --- a/src/KOKKOS/pair_lj_sdk_kokkos.cpp +++ b/src/KOKKOS/pair_lj_sdk_kokkos.cpp @@ -86,7 +86,7 @@ void PairLJSDKKokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -258,19 +258,12 @@ void PairLJSDKKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk"); } diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.h b/src/KOKKOS/pair_lj_sdk_kokkos.h index 090b9aa562..03ca361c1b 100644 --- a/src/KOKKOS/pair_lj_sdk_kokkos.h +++ b/src/KOKKOS/pair_lj_sdk_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template class PairLJSDKKokkos : public PairLJSDK { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; PairLJSDKKokkos(class LAMMPS *); @@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK { friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; friend class PairComputeFunctor; - friend class PairComputeFunctor; friend EV_FLOAT pair_compute_neighlist(PairLJSDKKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJSDKKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJSDKKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute_neighlist(PairLJSDKKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_fullcluster(PairLJSDKKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJSDKKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJSDKKokkos*); }; diff --git a/src/KOKKOS/pair_reax_c_kokkos.cpp b/src/KOKKOS/pair_reax_c_kokkos.cpp index 894c3ab53c..0fbf579a92 100644 --- a/src/KOKKOS/pair_reax_c_kokkos.cpp +++ b/src/KOKKOS/pair_reax_c_kokkos.cpp @@ -146,12 +146,10 @@ void PairReaxCKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index d2cda316be..8d0f2fcfc3 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -601,7 +601,6 @@ void PairSWKokkos::init_style() if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; if (neighflag == FULL) neighbor->requests[irequest]->ghost = 1; else diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index 278c5b0a2f..5230d1a91f 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -96,7 +96,7 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -142,19 +142,6 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); - } else if (neighflag == FULLCLUSTER) { - typedef PairComputeFunctor,FULLCLUSTER,false,S_TableCompute > - f_type; - f_type f(this,(NeighListKokkos*) list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); - else Kokkos::parallel_for(config,f); } } else { if (neighflag == FULL) { @@ -177,19 +164,6 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); - } else if (neighflag == FULLCLUSTER) { - typedef PairComputeFunctor,FULLCLUSTER,true,S_TableCompute > - f_type; - f_type f(this,(NeighListKokkos*) list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); - else Kokkos::parallel_for(config,f); } } @@ -1261,19 +1235,12 @@ void PairTableKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk"); } diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 09e64804b4..4d3a9ec106 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -41,7 +41,7 @@ template class PairTableKokkos : public Pair { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; @@ -170,45 +170,37 @@ class PairTableKokkos : public Pair { friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend void pair_virial_fdotr_compute(PairTableKokkos*); }; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 2908622e87..342aa8faec 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -103,7 +103,6 @@ void PairTersoffKokkos::init_style() //if (neighflag == FULL || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; if (neighflag == FULL) neighbor->requests[irequest]->ghost = 1; else @@ -126,26 +125,26 @@ void PairTersoffKokkos::setup_params() for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) for (k = 1; k <= n; k++) { - m = elem2param[map[i]][map[j]][map[k]]; - k_params.h_view(i,j,k).powerm = params[m].powerm; - k_params.h_view(i,j,k).gamma = params[m].gamma; - k_params.h_view(i,j,k).lam3 = params[m].lam3; - k_params.h_view(i,j,k).c = params[m].c; - k_params.h_view(i,j,k).d = params[m].d; - k_params.h_view(i,j,k).h = params[m].h; - k_params.h_view(i,j,k).powern = params[m].powern; - k_params.h_view(i,j,k).beta = params[m].beta; - k_params.h_view(i,j,k).lam2 = params[m].lam2; - k_params.h_view(i,j,k).bigb = params[m].bigb; - k_params.h_view(i,j,k).bigr = params[m].bigr; - k_params.h_view(i,j,k).bigd = params[m].bigd; - k_params.h_view(i,j,k).lam1 = params[m].lam1; - k_params.h_view(i,j,k).biga = params[m].biga; - k_params.h_view(i,j,k).cutsq = params[m].cutsq; - k_params.h_view(i,j,k).c1 = params[m].c1; - k_params.h_view(i,j,k).c2 = params[m].c2; - k_params.h_view(i,j,k).c3 = params[m].c3; - k_params.h_view(i,j,k).c4 = params[m].c4; + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).gamma = params[m].gamma; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).c = params[m].c; + k_params.h_view(i,j,k).d = params[m].d; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; } k_params.template modify(); diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index 3406c607f3..95da030b56 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -102,7 +102,6 @@ void PairTersoffMODKokkos::init_style() if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; if (neighflag == FULL) neighbor->requests[irequest]->ghost = 1; else @@ -125,27 +124,27 @@ void PairTersoffMODKokkos::setup_params() for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) for (k = 1; k <= n; k++) { - m = elem2param[map[i]][map[j]][map[k]]; - k_params.h_view(i,j,k).powerm = params[m].powerm; - k_params.h_view(i,j,k).lam3 = params[m].lam3; - k_params.h_view(i,j,k).h = params[m].h; - k_params.h_view(i,j,k).powern = params[m].powern; - k_params.h_view(i,j,k).beta = params[m].beta; - k_params.h_view(i,j,k).lam2 = params[m].lam2; - k_params.h_view(i,j,k).bigb = params[m].bigb; - k_params.h_view(i,j,k).bigr = params[m].bigr; - k_params.h_view(i,j,k).bigd = params[m].bigd; - k_params.h_view(i,j,k).lam1 = params[m].lam1; - k_params.h_view(i,j,k).biga = params[m].biga; - k_params.h_view(i,j,k).cutsq = params[m].cutsq; - k_params.h_view(i,j,k).c1 = params[m].c1; - k_params.h_view(i,j,k).c2 = params[m].c2; - k_params.h_view(i,j,k).c3 = params[m].c3; - k_params.h_view(i,j,k).c4 = params[m].c4; - k_params.h_view(i,j,k).c5 = params[m].c5; - k_params.h_view(i,j,k).ca1 = params[m].ca1; - k_params.h_view(i,j,k).ca4 = params[m].ca4; - k_params.h_view(i,j,k).powern_del = params[m].powern_del; + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; + k_params.h_view(i,j,k).c5 = params[m].c5; + k_params.h_view(i,j,k).ca1 = params[m].ca1; + k_params.h_view(i,j,k).ca4 = params[m].ca4; + k_params.h_view(i,j,k).powern_del = params[m].powern_del; } k_params.template modify(); diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 07341911bd..a9cc1d1730 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -113,7 +113,6 @@ void PairTersoffZBLKokkos::init_style() if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; if (neighflag == FULL) neighbor->requests[irequest]->ghost = 1; else @@ -136,30 +135,30 @@ void PairTersoffZBLKokkos::setup_params() for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) for (k = 1; k <= n; k++) { - m = elem2param[map[i]][map[j]][map[k]]; - k_params.h_view(i,j,k).powerm = params[m].powerm; - k_params.h_view(i,j,k).gamma = params[m].gamma; - k_params.h_view(i,j,k).lam3 = params[m].lam3; - k_params.h_view(i,j,k).c = params[m].c; - k_params.h_view(i,j,k).d = params[m].d; - k_params.h_view(i,j,k).h = params[m].h; - k_params.h_view(i,j,k).powern = params[m].powern; - k_params.h_view(i,j,k).beta = params[m].beta; - k_params.h_view(i,j,k).lam2 = params[m].lam2; - k_params.h_view(i,j,k).bigb = params[m].bigb; - k_params.h_view(i,j,k).bigr = params[m].bigr; - k_params.h_view(i,j,k).bigd = params[m].bigd; - k_params.h_view(i,j,k).lam1 = params[m].lam1; - k_params.h_view(i,j,k).biga = params[m].biga; - k_params.h_view(i,j,k).cutsq = params[m].cutsq; - k_params.h_view(i,j,k).c1 = params[m].c1; - k_params.h_view(i,j,k).c2 = params[m].c2; - k_params.h_view(i,j,k).c3 = params[m].c3; - k_params.h_view(i,j,k).c4 = params[m].c4; - k_params.h_view(i,j,k).Z_i = params[m].Z_i; - k_params.h_view(i,j,k).Z_j = params[m].Z_j; - k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut; - k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale; + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).gamma = params[m].gamma; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).c = params[m].c; + k_params.h_view(i,j,k).d = params[m].d; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; + k_params.h_view(i,j,k).Z_i = params[m].Z_i; + k_params.h_view(i,j,k).Z_j = params[m].Z_j; + k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut; + k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale; } k_params.template modify(); diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index 19b3204973..a8c9520298 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -33,10 +33,10 @@ template class RegBlockKokkos : public RegBlock { friend class FixPour; - public: typedef DeviceType device_type; typedef ArrayTypes AT; + public: RegBlockKokkos(class LAMMPS *, int, char **); ~RegBlockKokkos(); void match_all_kokkos(int, DAT::t_int_1d); diff --git a/src/finish.cpp b/src/finish.cpp index 0d767b42cd..f305d04346 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -630,22 +630,17 @@ void Finish::end(int flag) // count neighbors in that list for stats purposes // allow it to be Kokkos neigh list as well - for (m = 0; m < neighbor->old_nrequest; m++) { + for (m = 0; m < neighbor->old_nrequest; m++) if ((neighbor->old_requests[m]->half || neighbor->old_requests[m]->gran || neighbor->old_requests[m]->respaouter || neighbor->old_requests[m]->half_from_full) && neighbor->old_requests[m]->skip == 0 && - neighbor->lists[m] && neighbor->lists[m]->numneigh) { - if (!neighbor->lists[m] && lmp->kokkos && - lmp->kokkos->neigh_list_kokkos(m)) break; - else break; - } - } + neighbor->lists[m] && neighbor->lists[m]->numneigh) break; nneigh = 0; if (m < neighbor->old_nrequest) { - if (neighbor->lists[m]) { + if (!neighbor->lists[m]->kokkos) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; @@ -675,23 +670,19 @@ void Finish::end(int flag) // count neighbors in that list for stats purposes // allow it to be Kokkos neigh list as well - for (m = 0; m < neighbor->old_nrequest; m++) { + for (m = 0; m < neighbor->old_nrequest; m++) if (neighbor->old_requests[m]->full && - neighbor->old_requests[m]->skip == 0) { - if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; - else break; - } - } + neighbor->old_requests[m]->skip == 0) break; nneighfull = 0; if (m < neighbor->old_nrequest) { - if (neighbor->lists[m] && neighbor->lists[m]->numneigh) { + if (!neighbor->lists[m]->kokkos && neighbor->lists[m]->numneigh) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; for (i = 0; i < inum; i++) nneighfull += numneigh[ilist[i]]; - } else if (!neighbor->lists[m] && lmp->kokkos) + } else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m); tmp = nneighfull; @@ -865,7 +856,7 @@ void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt, time_cpu = tmp/nprocs*100.0; // % variance from the average as measure of load imbalance - if ((time_sq/time - time) > 1.0e-10) + if (time > 1.0e-10) time_sq = sqrt(time_sq/time - time)*100.0; else time_sq = 0.0; @@ -917,7 +908,7 @@ void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which, time_std /= nthreads; time_total /= nthreads; - if ((time_std/time_avg -time_avg) > 1.0e-10) + if (time_avg > 1.0e-10) time_std = sqrt(time_std/time_avg - time_avg)*100.0; else time_std = 0.0; diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index dfab9b023a..f8d496fc6b 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -67,6 +67,11 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) ipage = NULL; dpage = NULL; + // Kokkos package + + kokkos = 0; + execution_space = Host; + // USER-DPD package ndxAIR_ssa = NULL; diff --git a/src/neigh_list.h b/src/neigh_list.h index d3bde212c2..3b6a4d6760 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -69,6 +69,11 @@ class NeighList : protected Pointers { NeighList *listcopy; // me = copy list, point to list I copy from NeighList *listskip; // me = skip list, point to list I skip from + // Kokkos package + + int kokkos; // 1 if list stores Kokkos data + ExecutionSpace execution_space; + // USER-DPD package and Shardlow Splitting Algorithm (SSA) support uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR @@ -80,7 +85,6 @@ class NeighList : protected Pointers { void post_constructor(class NeighRequest *); void setup_pages(int, int); // setup page data structures void grow(int,int); // grow all data structs - void stencil_allocate(int, int); // allocate stencil arrays void print_attributes(); // debug routine int get_maxlocal() {return maxatom;} bigint memory_usage(); diff --git a/src/neighbor.cpp b/src/neighbor.cpp index e58fc7126e..f27376cb2a 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -621,8 +621,7 @@ void Neighbor::init_pair() delete [] neigh_stencil; delete [] neigh_pair; - if (lmp->kokkos) nlist = init_lists_kokkos(); - else nlist = nrequest; + nlist = nrequest; lists = new NeighList*[nrequest]; neigh_bin = new NBin*[nrequest]; @@ -634,11 +633,10 @@ void Neighbor::init_pair() // wait to allocate initial pages until copy lists are detected for (i = 0; i < nrequest; i++) { - if (requests[i]->kokkos_host || requests[i]->kokkos_device) { - lists[i] = NULL; - continue; - } - lists[i] = new NeighList(lmp); + if (requests[i]->kokkos_host || requests[i]->kokkos_device) + create_kokkos_list(i); + else + lists[i] = new NeighList(lmp); lists[i]->index = i; if (requests[i]->pair) { @@ -680,10 +678,8 @@ void Neighbor::init_pair() // would be useful when reax/c used in hybrid mode, e.g. with airebo for (i = 0; i < nrequest; i++) { - if (lists[i] == NULL) continue; // Kokkos if (requests[i]->pair && requests[i]->half && requests[i]->newton != 2) { for (j = 0; j < nrequest; j++) { - if (lists[j] == NULL) continue; // Kokkos if (requests[j]->full && requests[j]->occasional == 0 && !requests[j]->skip && !requests[j]->copy) break; } @@ -708,10 +704,8 @@ void Neighbor::init_pair() // for 1st or 2nd check, parent can be copy list or pair or fix for (i = 0; i < nrequest; i++) { - if (lists[i] == NULL) continue; // Kokkos if (!requests[i]->fix && !requests[i]->compute) continue; for (j = 0; j < nrequest; j++) { - if (lists[j] == NULL) continue; // Kokkos if (requests[i]->half && requests[j]->pair && !requests[j]->skip && requests[j]->half && !requests[j]->copy) break; @@ -733,7 +727,6 @@ void Neighbor::init_pair() continue; } for (j = 0; j < nrequest; j++) { - if (lists[j] == NULL) continue; // Kokkos if (requests[i]->half && requests[j]->pair && !requests[j]->skip && requests[j]->full && !requests[j]->copy) break; @@ -844,7 +837,6 @@ void Neighbor::init_pair() int dnummax = 0; for (i = 0; i < nlist; i++) { - if (lists[i] == NULL) continue; // Kokkos if (lists[i]->copy) continue; lists[i]->setup_pages(pgsize,oneatom); dnummax = MAX(dnummax,lists[i]->dnum); @@ -864,14 +856,8 @@ void Neighbor::init_pair() // also Kokkos list initialization int maxatom = atom->nmax; - for (i = 0; i < nlist; i++) { - if (lists[i]) { - if (neigh_pair[i] && !lists[i]->copy) lists[i]->grow(maxatom,maxatom); - } else { - init_list_flags1_kokkos(i); - init_list_grow_kokkos(i); - } - } + for (i = 0; i < nlist; i++) + if (neigh_pair[i] && !lists[i]->copy) lists[i]->grow(maxatom,maxatom); // plist = indices of perpetual NPair classes // perpetual = non-occasional, re-built at every reneighboring @@ -885,10 +871,8 @@ void Neighbor::init_pair() plist = new int[nlist]; for (i = 0; i < nlist; i++) { - if (lists[i]) { - if (lists[i]->occasional == 0 && lists[i]->pair_method) - plist[npair_perpetual++] = i; - } else init_list_flags2_kokkos(i); + if (lists[i]->occasional == 0 && lists[i]->pair_method) + plist[npair_perpetual++] = i; } for (i = 0; i < nstencil; i++) { @@ -910,7 +894,6 @@ void Neighbor::init_pair() while (!done) { done = 1; for (i = 0; i < npair_perpetual; i++) { - if (!lists[plist[i]]) continue; // Kokkos check ptr = NULL; if (lists[plist[i]]->listcopy) ptr = lists[plist[i]]->listcopy; if (lists[plist[i]]->listskip) ptr = lists[plist[i]]->listskip; @@ -1154,15 +1137,14 @@ void Neighbor::print_pairwise_info() else if (requests[i]->respamiddle) kind = "respa/middle"; else if (requests[i]->respaouter) kind = "respa/outer"; else if (requests[i]->half_from_full) kind = "half/from/full"; - else if (requests[i]->full_cluster) kind = "full/cluster"; // Kokkos - fprintf(out," kind: %s",kind); - if (requests[i]->occasional) fprintf(out,", occasional"); else fprintf(out,", perpetual"); if (requests[i]->ghost) fprintf(out,", ghost"); if (requests[i]->ssa) fprintf(out,", ssa"); if (requests[i]->omp) fprintf(out,", omp"); if (requests[i]->intel) fprintf(out,", intel"); + if (requests[i]->kokkos_device) fprintf(out,", kokkos_device"); + if (requests[i]->kokkos_host) fprintf(out,", kokkos_host"); if (requests[i]->copy) fprintf(out,", copy from (%d)",requests[i]->otherlist+1); if (requests[i]->skip) @@ -1237,13 +1219,17 @@ int Neighbor::choose_bin(NeighRequest *rq) // flags for settings the request + system requires of NBin class // ssaflag = no/yes ssa request // intelflag = no/yes intel request + // kokkos_device_flag = no/yes kokkos device request + // kokkos_host_flag = no/yes kokkos host request - int ssaflag,intelflag; + int ssaflag,intelflag,kokkos_device_flag,kokkos_host_flag; - ssaflag = intelflag = 0; + ssaflag = intelflag = kokkos_device_flag = kokkos_host_flag = 0; if (rq->ssa) ssaflag = NB_SSA; if (rq->intel) intelflag = NB_INTEL; + if (rq->kokkos_device) kokkos_device_flag = NB_KOKKOS_DEVICE; + if (rq->kokkos_host) kokkos_host_flag = NB_KOKKOS_HOST; // use flags to match exactly one of NBin class masks, bit by bit @@ -1254,6 +1240,8 @@ int Neighbor::choose_bin(NeighRequest *rq) if (ssaflag != (mask & NB_SSA)) continue; if (intelflag != (mask & NB_INTEL)) continue; + if (kokkos_device_flag != (mask & NB_KOKKOS_DEVICE)) continue; + if (kokkos_host_flag != (mask & NB_KOKKOS_HOST)) continue; return i+1; } @@ -1308,6 +1296,7 @@ int Neighbor::choose_stencil(NeighRequest *rq) else if (rq->newton == 1) newtflag = 1; else if (rq->newton == 2) newtflag = 0; + // use flags to match exactly one of NStencil class masks, bit by bit // exactly one of halfflag,fullflag is set and thus must match @@ -1381,16 +1370,18 @@ int Neighbor::choose_pair(NeighRequest *rq) // ssaflag = no/yes request // ompflag = no/yes omp request // intelflag = no/yes intel request + // kokkos_device_flag = no/yes Kokkos device request + // kokkos_host_flag = no/yes Kokkos host request // newtflag = newton off/on request // style = NSQ/BIN/MULTI neighbor style // triclinic = orthgonal/triclinic box int copyflag,skipflag,halfflag,fullflag,halffullflag,sizeflag,respaflag, - ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag; + ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag,kokkos_device_flag,kokkos_host_flag; copyflag = skipflag = halfflag = fullflag = halffullflag = sizeflag = ghostflag = respaflag = off2onflag = onesideflag = ssaflag = - ompflag = intelflag = 0; + ompflag = intelflag = kokkos_device_flag = kokkos_host_flag = 0; if (rq->copy) copyflag = NP_COPY; if (rq->skip) skipflag = NP_SKIP; @@ -1420,6 +1411,8 @@ int Neighbor::choose_pair(NeighRequest *rq) if (rq->ssa) ssaflag = NP_SSA; if (rq->omp) ompflag = NP_OMP; if (rq->intel) intelflag = NP_INTEL; + if (rq->kokkos_device) kokkos_device_flag = NP_KOKKOS_DEVICE; + if (rq->kokkos_host) kokkos_host_flag = NP_KOKKOS_HOST; int newtflag; if (rq->newton == 0 && newton_pair) newtflag = 1; @@ -1460,6 +1453,8 @@ int Neighbor::choose_pair(NeighRequest *rq) if (ssaflag != (mask & NP_SSA)) continue; if (ompflag != (mask & NP_OMP)) continue; if (intelflag != (mask & NP_INTEL)) continue; + if (kokkos_device_flag != (mask & NP_KOKKOS_DEVICE)) continue; + if (kokkos_host_flag != (mask & NP_KOKKOS_HOST)) continue; if (style == NSQ && !(mask & NP_NSQ)) continue; if (style == BIN && !(mask & NP_BIN)) continue; @@ -1802,6 +1797,7 @@ void Neighbor::build_one(class NeighList *mylist, int preflag) ns->create(); } + // build the list np->build_setup(); diff --git a/src/neighbor.h b/src/neighbor.h index 9655cca545..eb603ad84f 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -201,7 +201,7 @@ class Neighbor : protected Pointers { void init_styles(); void init_pair(); - void init_topology(); + virtual void init_topology(); void print_pairwise_info(); void requests_new2old(); @@ -220,18 +220,17 @@ class Neighbor : protected Pointers { int copymode; virtual void init_cutneighsq_kokkos(int) {} - virtual int init_lists_kokkos() {return 0;} - virtual void init_list_flags1_kokkos(int) {} - virtual void init_list_flags2_kokkos(int) {} + virtual void create_kokkos_list(int) {} virtual void init_ex_type_kokkos(int) {} virtual void init_ex_bit_kokkos() {} virtual void init_ex_mol_bit_kokkos() {} - virtual void init_list_grow_kokkos(int) {} }; namespace NeighConst { static const int NB_SSA = 1<<0; static const int NB_INTEL = 1<<1; + static const int NB_KOKKOS_DEVICE = 1<<2; + static const int NB_KOKKOS_HOST = 1<<3; static const int NS_HALF = 1<<0; static const int NS_FULL = 1<<1; @@ -266,6 +265,8 @@ namespace NeighConst { static const int NP_NEWTOFF = 1<<17; static const int NP_ORTHO = 1<<18; static const int NP_TRI = 1<<19; + static const int NP_KOKKOS_DEVICE = 1<<20; + static const int NP_KOKKOS_HOST = 1<<21; } } diff --git a/src/npair.h b/src/npair.h index 70fcc5c452..a6440faddf 100644 --- a/src/npair.h +++ b/src/npair.h @@ -31,7 +31,7 @@ class NPair : protected Pointers { NPair(class LAMMPS *); virtual ~NPair() {} - void copy_neighbor_info(); + virtual void copy_neighbor_info(); void build_setup(); virtual void build(class NeighList *) = 0; @@ -94,8 +94,8 @@ class NPair : protected Pointers { // methods for all NPair variants void copy_bin_setup_info(); - void copy_bin_info(); - void copy_stencil_info(); + virtual void copy_bin_info(); + virtual void copy_stencil_info(); int exclusion(int, int, int, int, int *, tagint *) const; // test for pair exclusion diff --git a/src/nstencil.h b/src/nstencil.h index b9c6dd58fb..8672584a19 100644 --- a/src/nstencil.h +++ b/src/nstencil.h @@ -37,7 +37,7 @@ class NStencil : protected Pointers { NStencil(class LAMMPS *); virtual ~NStencil(); void copy_neighbor_info(); - void create_setup(); + virtual void create_setup(); bigint memory_usage(); virtual void create() = 0; From 9b48c49f8348560037c98fb38b86b346838d6936 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Dec 2016 09:18:55 -0700 Subject: [PATCH 2/4] Removing used Kokkos file --- src/KOKKOS/neigh_full_kokkos.h | 875 --------------------------------- 1 file changed, 875 deletions(-) delete mode 100644 src/KOKKOS/neigh_full_kokkos.h diff --git a/src/KOKKOS/neigh_full_kokkos.h b/src/KOKKOS/neigh_full_kokkos.h deleted file mode 100644 index 9125b5fbe2..0000000000 --- a/src/KOKKOS/neigh_full_kokkos.h +++ /dev/null @@ -1,875 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include "atom_kokkos.h" -#include "atom_masks.h" -#include "domain_kokkos.h" - -namespace LAMMPS_NS { - -/* ---------------------------------------------------------------------- */ - -template -void NeighborKokkos::full_bin_kokkos(NeighListKokkos *list) -{ - const int nlocal = includegroup?atom->nfirst:atom->nlocal; - int nall = nlocal; - if (GHOST) - nall += atom->nghost; - list->grow(nall); - - NeighborKokkosExecute - data(*list, - k_cutneighsq.view(), - k_bincount.view(), - k_bins.view(),nlocal, - atomKK->k_x.view(), - atomKK->k_type.view(), - atomKK->k_mask.view(), - atomKK->k_molecule.view(), - atomKK->k_tag.view(), - atomKK->k_special.view(), - atomKK->k_nspecial.view(), - atomKK->molecular, - nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, - bininvx,bininvy,bininvz, - exclude, nex_type,maxex_type, - k_ex1_type.view(), - k_ex2_type.view(), - k_ex_type.view(), - nex_group,maxex_group, - k_ex1_group.view(), - k_ex2_group.view(), - k_ex1_bit.view(), - k_ex2_bit.view(), - nex_mol, maxex_mol, - k_ex_mol_group.view(), - k_ex_mol_bit.view(), - bboxhi,bboxlo, - domain->xperiodic,domain->yperiodic,domain->zperiodic, - domain->xprd_half,domain->yprd_half,domain->zprd_half); - - k_cutneighsq.sync(); - k_ex1_type.sync(); - k_ex2_type.sync(); - k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); - k_ex1_bit.sync(); - k_ex2_bit.sync(); - k_ex_mol_group.sync(); - k_ex_mol_bit.sync(); - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); - Kokkos::deep_copy(list->d_stencil,list->h_stencil); - if (GHOST) - Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz); - - data.special_flag[0] = special_flag[0]; - data.special_flag[1] = special_flag[1]; - data.special_flag[2] = special_flag[2]; - data.special_flag[3] = special_flag[3]; - - while(data.h_resize() > 0) { - data.h_resize() = 0; - deep_copy(data.resize, data.h_resize); - - MemsetZeroFunctor f_zero; - f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); - Kokkos::parallel_for(mbins, f_zero); - DeviceType::fence(); - - NeighborKokkosBinAtomsFunctor f(data); - - Kokkos::parallel_for(atom->nlocal+atom->nghost, f); - DeviceType::fence(); - - deep_copy(data.h_resize, data.resize); - if(data.h_resize()) { - - atoms_per_bin += 16; - k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); - data.bins = k_bins.view(); - data.c_bins = data.bins; - } - } - - if(list->d_neighbors.dimension_0()d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs); - list->d_numneigh = typename ArrayTypes::t_int_1d("numneigh", nall*1.1); - data.neigh_list.d_neighbors = list->d_neighbors; - data.neigh_list.d_numneigh = list->d_numneigh; - } - data.h_resize()=1; - while(data.h_resize()) { - data.h_new_maxneighs() = list->maxneighs; - data.h_resize() = 0; - - Kokkos::deep_copy(data.resize, data.h_resize); - Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#ifdef KOKKOS_HAVE_CUDA - #define BINS_PER_BLOCK 2 - const int factor = atoms_per_bin<64?2:1; - Kokkos::TeamPolicy config((mbins+factor-1)/factor,atoms_per_bin*factor); -#else - const int factor = 1; -#endif - -if (GHOST) { - NeighborKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); - Kokkos::parallel_for(nall, f); -} else { - if(newton_pair) { - NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_HAVE_CUDA - Kokkos::parallel_for(config, f); -#else - Kokkos::parallel_for(nall, f); -#endif - } else { - NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_HAVE_CUDA - Kokkos::parallel_for(config, f); -#else - Kokkos::parallel_for(nall, f); -#endif - } -} - DeviceType::fence(); - deep_copy(data.h_resize, data.resize); - - if(data.h_resize()) { - deep_copy(data.h_new_maxneighs, data.new_maxneighs); - list->maxneighs = data.h_new_maxneighs() * 1.2; - list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs); - data.neigh_list.d_neighbors = list->d_neighbors; - data.neigh_list.maxneighs = list->maxneighs; - } - } - - if (GHOST) { - list->inum = atom->nlocal; - list->gnum = nall - atom->nlocal; - } else { - list->inum = nall; - list->gnum = 0; - } - - list->k_ilist.template modify(); -} - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void NeighborKokkosExecute::binatomsItem(const int &i) const -{ - const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2)); - - const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1); - if(ac < bins.dimension_1()) { - bins(ibin, ac) = i; - } else { - resize() = 1; - } -} - -/* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -int NeighborKokkosExecute::find_special(const int &i, const int &j) const -{ - const int n1 = nspecial(i,0); - const int n2 = nspecial(i,1); - const int n3 = nspecial(i,2); - - for (int k = 0; k < n3; k++) { - if (special(i,k) == tag(j)) { - if (k < n1) { - if (special_flag[1] == 0) return -1; - else if (special_flag[1] == 1) return 0; - else return 1; - } else if (k < n2) { - if (special_flag[2] == 0) return -1; - else if (special_flag[2] == 1) return 0; - else return 2; - } else { - if (special_flag[3] == 0) return -1; - else if (special_flag[3] == 1) return 0; - else return 3; - } - } - } - return 0; -}; - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -int NeighborKokkosExecute::exclusion(const int &i,const int &j, - const int &itype,const int &jtype) const -{ - int m; - - if (nex_type && ex_type(itype,jtype)) return 1; - - if (nex_group) { - for (m = 0; m < nex_group; m++) { - if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1; - if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1; - } - } - - if (nex_mol) { - for (m = 0; m < nex_mol; m++) - if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) && - molecule(i) == molecule(j)) return 1; - } - - return 0; -} - -/* ---------------------------------------------------------------------- */ - -template template -void NeighborKokkosExecute:: - build_Item(const int &i) const -{ - /* if necessary, goto next page and add pages */ - int n = 0; - int which = 0; - int moltemplate; - if (molecular == 2) moltemplate = 1; - else moltemplate = 0; - // get subview of neighbors of i - - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); - const X_FLOAT xtmp = x(i, 0); - const X_FLOAT ytmp = x(i, 1); - const X_FLOAT ztmp = x(i, 2); - const int itype = type(i); - - const int ibin = coord2bin(xtmp, ytmp, ztmp); - - const int nstencil = neigh_list.nstencil; - const typename ArrayTypes::t_int_1d_const_um stencil - = neigh_list.d_stencil; - - // loop over all bins in neighborhood (includes ibin) - if(HalfNeigh) - for(int m = 0; m < c_bincount(ibin); m++) { - const int j = c_bins(ibin,m); - const int jtype = type(j); - - //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists - if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) || - (HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) && - ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || - (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) - ) continue; - if(exclude && exclusion(i,j,itype,jtype)) continue; - - const X_FLOAT delx = xtmp - x(j, 0); - const X_FLOAT dely = ytmp - x(j, 1); - const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - if(rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n 0) { - if(n::t_int_1d_const_um =Kokkos::subview(bins,jbin,ALL); - for(int m = 0; m < c_bincount(jbin); m++) { - const int j = c_bins(jbin,m); - const int jtype = type(j); - - if(HalfNeigh && !GhostNewton && (j < i)) continue; - if(!HalfNeigh && j==i) continue; - if(exclude && exclusion(i,j,itype,jtype)) continue; - - const X_FLOAT delx = xtmp - x(j, 0); - const X_FLOAT dely = ytmp - x(j, 1); - const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - - if(rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n 0) { - if(n= neigh_list.maxneighs) { - resize() = 1; - - if(n >= new_maxneighs()) new_maxneighs() = n; - } - neigh_list.d_ilist(i) = i; -} - -#ifdef KOKKOS_HAVE_CUDA -extern __shared__ X_FLOAT sharedmem[]; - -/* ---------------------------------------------------------------------- */ - -template template -__device__ inline -void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const -{ - /* loop over atoms in i's bin, - */ - const int atoms_per_bin = c_bins.dimension_1(); - const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; - const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); - const int MY_BIN = dev.team_rank()/atoms_per_bin; - - const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; - - if(ibin >=c_bincount.dimension_0()) return; - X_FLOAT* other_x = sharedmem; - other_x = other_x + 5*atoms_per_bin*MY_BIN; - - int* other_id = (int*) &other_x[4 * atoms_per_bin]; - - int bincount_current = c_bincount[ibin]; - - for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { - const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); - const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; - /* if necessary, goto next page and add pages */ - - int n = 0; - - X_FLOAT xtmp; - X_FLOAT ytmp; - X_FLOAT ztmp; - int itype; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i= 0) { - xtmp = x(i, 0); - ytmp = x(i, 1); - ztmp = x(i, 2); - itype = type(i); - other_x[MY_II] = xtmp; - other_x[MY_II + atoms_per_bin] = ytmp; - other_x[MY_II + 2 * atoms_per_bin] = ztmp; - other_x[MY_II + 3 * atoms_per_bin] = itype; - } - other_id[MY_II] = i; - int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); - - if(test) return; - - if(i >= 0 && i < nlocal) { - #pragma unroll 4 - for(int m = 0; m < bincount_current; m++) { - int j = other_id[m]; - const int jtype = other_x[m + 3 * atoms_per_bin]; - - //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists - if((j == i) || - (HalfNeigh && !GhostNewton && (j < i)) || - (HalfNeigh && GhostNewton && - ((j < i) || - ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || - (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) - ) continue; - if(exclude && exclusion(i,j,itype,jtype)) continue; - const X_FLOAT delx = xtmp - other_x[m]; - const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; - const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - - if(rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - int which = 0; - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n 0) { - if(n::t_int_1d_const_um stencil - = neigh_list.d_stencil; - for(int k = 0; k < nstencil; k++) { - const int jbin = ibin + stencil[k]; - - if(ibin == jbin) continue; - - bincount_current = c_bincount[jbin]; - int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; - - if(j >= 0) { - other_x[MY_II] = x(j, 0); - other_x[MY_II + atoms_per_bin] = x(j, 1); - other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); - other_x[MY_II + 3 * atoms_per_bin] = type(j); - } - - other_id[MY_II] = j; - - __syncthreads(); - - if(i >= 0 && i < nlocal) { - #pragma unroll 8 - for(int m = 0; m < bincount_current; m++) { - const int j = other_id[m]; - const int jtype = other_x[m + 3 * atoms_per_bin]; - - //if(HalfNeigh && (j < i)) continue; - if(HalfNeigh && !GhostNewton && (j < i)) continue; - if(!HalfNeigh && j==i) continue; - if(exclude && exclusion(i,j,itype,jtype)) continue; - - const X_FLOAT delx = xtmp - other_x[m]; - const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; - const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - - if(rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - int which = 0; - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n 0) { - if(n= 0 && i < nlocal) { - neigh_list.d_numneigh(i) = n; - neigh_list.d_ilist(i) = i; - } - - if(n >= neigh_list.maxneighs) { - resize() = 1; - - if(n >= new_maxneighs()) new_maxneighs() = n; - } - } -} -#endif - -/* ---------------------------------------------------------------------- */ - -template template -void NeighborKokkosExecute:: - build_Item_Ghost(const int &i) const -{ - /* if necessary, goto next page and add pages */ - int n = 0; - int which = 0; - int moltemplate; - if (molecular == 2) moltemplate = 1; - else moltemplate = 0; - // get subview of neighbors of i - - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); - const X_FLOAT xtmp = x(i, 0); - const X_FLOAT ytmp = x(i, 1); - const X_FLOAT ztmp = x(i, 2); - const int itype = type(i); - - const int nstencil = neigh_list.nstencil; - const typename ArrayTypes::t_int_1d_const_um stencil - = neigh_list.d_stencil; - const typename ArrayTypes::t_int_1d_3_const_um stencilxyz - = neigh_list.d_stencilxyz; - - // loop over all atoms in surrounding bins in stencil including self - // when i is a ghost atom, must check if stencil bin is out of bounds - // skip i = j - // no molecular test when i = ghost atom - - if (i < nlocal) { - const int ibin = coord2bin(xtmp, ytmp, ztmp); - for (int k = 0; k < nstencil; k++) { - const int jbin = ibin + stencil[k]; - for(int m = 0; m < c_bincount(jbin); m++) { - const int j = c_bins(jbin,m); - - if (HalfNeigh && j <= i) continue; - else if (j == i) continue; - - const int jtype = type[j]; - if(exclude && exclusion(i,j,itype,jtype)) continue; - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n 0) { - if(n= mbinx || - ybin2 < 0 || ybin2 >= mbiny || - zbin2 < 0 || zbin2 >= mbinz) continue; - const int jbin = ibin + stencil[k]; - for(int m = 0; m < c_bincount(jbin); m++) { - const int j = c_bins(jbin,m); - - if (HalfNeigh && j <= i) continue; - else if (j == i) continue; - - const int jtype = type[j]; - if(exclude && exclusion(i,j,itype,jtype)) continue; - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq(itype,jtype)) { - if(n= neigh_list.maxneighs) { - resize() = 1; - - if(n >= new_maxneighs()) new_maxneighs() = n; - } - neigh_list.d_ilist(i) = i; -} - -template -void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos *list) -{ - const int nall = includegroup?atom->nfirst:atom->nlocal; - list->grow(nall); - - NeighborKokkosExecute - data(*list, - k_cutneighsq.view(), - k_bincount.view(), - k_bins.view(),nall, - atomKK->k_x.view(), - atomKK->k_type.view(), - atomKK->k_mask.view(), - atomKK->k_molecule.view(), - atomKK->k_tag.view(), - atomKK->k_special.view(), - atomKK->k_nspecial.view(), - atomKK->molecular, - nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, - bininvx,bininvy,bininvz, - exclude, nex_type,maxex_type, - k_ex1_type.view(), - k_ex2_type.view(), - k_ex_type.view(), - nex_group,maxex_group, - k_ex1_group.view(), - k_ex2_group.view(), - k_ex1_bit.view(), - k_ex2_bit.view(), - nex_mol, maxex_mol, - k_ex_mol_group.view(), - k_ex_mol_bit.view(), - bboxhi,bboxlo, - domain->xperiodic,domain->yperiodic,domain->zperiodic, - domain->xprd_half,domain->yprd_half,domain->zprd_half); - - k_cutneighsq.sync(); - k_ex1_type.sync(); - k_ex2_type.sync(); - k_ex_type.sync(); - k_ex1_group.sync(); - k_ex2_group.sync(); - k_ex1_bit.sync(); - k_ex2_bit.sync(); - k_ex_mol_group.sync(); - k_ex_mol_bit.sync(); - - data.special_flag[0] = special_flag[0]; - data.special_flag[1] = special_flag[1]; - data.special_flag[2] = special_flag[2]; - data.special_flag[3] = special_flag[3]; - - atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); - Kokkos::deep_copy(list->d_stencil,list->h_stencil); - DeviceType::fence(); - - while(data.h_resize() > 0) { - data.h_resize() = 0; - deep_copy(data.resize, data.h_resize); - - MemsetZeroFunctor f_zero; - f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); - Kokkos::parallel_for(mbins, f_zero); - DeviceType::fence(); - - NeighborKokkosBinAtomsFunctor f(data); - - Kokkos::parallel_for(atom->nlocal+atom->nghost, f); - DeviceType::fence(); - - deep_copy(data.h_resize, data.resize); - if(data.h_resize()) { - - atoms_per_bin += 16; - k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); - data.bins = k_bins.view(); - data.c_bins = data.bins; - } - } - - if(list->d_neighbors.dimension_0()d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs); - list->d_numneigh = typename ArrayTypes::t_int_1d("numneigh", nall*1.1); - data.neigh_list.d_neighbors = list->d_neighbors; - data.neigh_list.d_numneigh = list->d_numneigh; - } - data.h_resize()=1; - while(data.h_resize()) { - data.h_new_maxneighs() = list->maxneighs; - data.h_resize() = 0; - - Kokkos::deep_copy(data.resize, data.h_resize); - Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#ifdef KOKKOS_HAVE_CUDA - #define BINS_PER_BLOCK 2 - const int factor = atoms_per_bin<64?2:1; - Kokkos::TeamPolicy config((mbins+factor-1)/factor,atoms_per_bin*factor); -#else - const int factor = 1; -#endif - -if(newton_pair) { - NeighborClusterKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -//#ifdef KOKKOS_HAVE_CUDA -// Kokkos::parallel_for(config, f); -//#else - Kokkos::parallel_for(nall, f); -//#endif -} else { - NeighborClusterKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -//#ifdef KOKKOS_HAVE_CUDA -// Kokkos::parallel_for(config, f); -//#else - Kokkos::parallel_for(nall, f); -//#endif -} - DeviceType::fence(); - deep_copy(data.h_resize, data.resize); - - if(data.h_resize()) { - deep_copy(data.h_new_maxneighs, data.new_maxneighs); - list->maxneighs = data.h_new_maxneighs() * 1.2; - list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs); - data.neigh_list.d_neighbors = list->d_neighbors; - data.neigh_list.maxneighs = list->maxneighs; - } - } - - list->inum = nall; - list->gnum = 0; - -} - -/* ---------------------------------------------------------------------- */ - -template template -void NeighborKokkosExecute:: - build_cluster_Item(const int &i) const -{ - /* if necessary, goto next page and add pages */ - int n = 0; - - // get subview of neighbors of i - - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); - const X_FLOAT xtmp = x(i, 0); - const X_FLOAT ytmp = x(i, 1); - const X_FLOAT ztmp = x(i, 2); - const int itype = type(i); - - const int ibin = coord2bin(xtmp, ytmp, ztmp); - - const int nstencil = neigh_list.nstencil; - const typename ArrayTypes::t_int_1d_const_um stencil - = neigh_list.d_stencil; - - for(int k = 0; k < nstencil; k++) { - const int jbin = ibin + stencil[k]; - for(int m = 0; m < c_bincount(jbin); m++) { - const int j = c_bins(jbin,m); - bool skip = i == j; - for(int k = 0; k< (n= neigh_list.maxneighs) { - resize() = 1; - - if(n >= new_maxneighs()) new_maxneighs() = n; - } - neigh_list.d_ilist(i) = i; -} - -} From 435421301b0b851f1b73c04885f63e2f09c20475 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 9 Dec 2016 08:37:01 -0700 Subject: [PATCH 3/4] Small tweaks to Kokkos neighbor --- src/KOKKOS/neigh_list_kokkos.h | 2 +- src/KOKKOS/neighbor_kokkos.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index 393fa478a1..45e768927c 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -76,7 +76,7 @@ public: typename ArrayTypes::t_int_1d d_numneigh; // # of J neighs for each I NeighListKokkos(class LAMMPS *lmp): - NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; + NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0; execution_space = ExecutionSpaceFromDevice::space; }; ~NeighListKokkos() {numneigh = NULL; ilist = NULL;}; diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index ff154c9919..ae8ae82c13 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -92,6 +92,9 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n) void NeighborKokkos::create_kokkos_list(int i) { + if (style != BIN) + error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists"); + if (requests[i]->kokkos_device) { lists[i] = new NeighListKokkos(lmp); device_flag = 1; @@ -227,9 +230,6 @@ void NeighborKokkos::build(int topoflag) template void NeighborKokkos::build_kokkos(int topoflag) { - if (style != BIN) - error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists"); - typedef DeviceType device_type; int i,m; From 43c459ba56948da331446e3e7357691b3b95e177 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 9 Dec 2016 15:56:55 -0700 Subject: [PATCH 4/4] More changes for Kokkos neighbor --- src/KOKKOS/Install.sh | 2 ++ src/KOKKOS/nbin_kokkos.cpp | 1 + src/KOKKOS/npair_copy_kokkos.cpp | 62 ++++++++++++++++++++++++++++++++ src/KOKKOS/npair_copy_kokkos.h | 48 +++++++++++++++++++++++++ src/KOKKOS/npair_kokkos.cpp | 15 ++++---- src/KOKKOS/npair_kokkos.h | 23 ++++-------- src/neighbor.cpp | 22 +++++++++--- 7 files changed, 146 insertions(+), 27 deletions(-) create mode 100644 src/KOKKOS/npair_copy_kokkos.cpp create mode 100644 src/KOKKOS/npair_copy_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ebafb87466..7a725a021c 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -109,6 +109,8 @@ action neigh_list_kokkos.cpp action neigh_list_kokkos.h action neighbor_kokkos.cpp action neighbor_kokkos.h +action npair_copy_kokkos.cpp +action npair_copy_kokkos.h action npair_kokkos.cpp action npair_kokkos.h action nbin_kokkos.cpp diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index feec72f45a..9a73e23b42 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -116,6 +116,7 @@ void NBinKokkos::bin_atoms() k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); bins = k_bins.view(); c_bins = bins; + last_bin_memory = update->ntimestep; } } } diff --git a/src/KOKKOS/npair_copy_kokkos.cpp b/src/KOKKOS/npair_copy_kokkos.cpp new file mode 100644 index 0000000000..6835d8c1b5 --- /dev/null +++ b/src/KOKKOS/npair_copy_kokkos.cpp @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_copy_kokkos.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "atom.h" +#include "atom_vec.h" +#include "molecule.h" +#include "domain.h" +#include "my_page.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +NPairCopyKokkos::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + create list which is simply a copy of parent list +------------------------------------------------------------------------- */ + +template +void NPairCopyKokkos::build(NeighList *list) +{ + NeighList *listcopy = list->listcopy; + + list->inum = listcopy->inum; + list->gnum = listcopy->gnum; + list->ilist = listcopy->ilist; + list->numneigh = listcopy->numneigh; + list->firstneigh = listcopy->firstneigh; + list->firstdouble = listcopy->firstdouble; + list->ipage = listcopy->ipage; + list->dpage = listcopy->dpage; + + NeighListKokkos* list_kk = (NeighListKokkos*) list; + NeighListKokkos* listcopy_kk = (NeighListKokkos*) list->listcopy; + + list_kk->d_ilist = listcopy_kk->d_ilist; + list_kk->d_numneigh = listcopy_kk->d_numneigh; + list_kk->d_neighbors = listcopy_kk->d_neighbors; +} + +namespace LAMMPS_NS { +template class NPairCopyKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class NPairCopyKokkos; +#endif +} diff --git a/src/KOKKOS/npair_copy_kokkos.h b/src/KOKKOS/npair_copy_kokkos.h new file mode 100644 index 0000000000..84eb10b204 --- /dev/null +++ b/src/KOKKOS/npair_copy_kokkos.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS + +NPairStyle(copy/kk/device, + NPairCopyKokkos, + NP_COPY | NP_KOKKOS_DEVICE) + +NPairStyle(copy/kk/host, + NPairCopyKokkos, + NP_COPY | NP_KOKKOS_HOST) + +#else + +#ifndef LMP_NPAIR_COPY_KOKKOS_H +#define LMP_NPAIR_COPY_KOKKOS_H + +#include "npair.h" + +namespace LAMMPS_NS { + +template +class NPairCopyKokkos : public NPair { + public: + NPairCopyKokkos(class LAMMPS *); + ~NPairCopyKokkos() {} + void build(class NeighList *); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index f987304452..fd32cd463e 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -51,9 +51,9 @@ void NPairKokkos::copy_neighbor_info() k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; k_ex1_group = neighborKK->k_ex1_group; - k_ex2_group = neighborKK->k_ex1_group; - k_ex1_bit = neighborKK->k_ex1_group; - k_ex2_bit = neighborKK->k_ex1_group; + k_ex2_group = neighborKK->k_ex2_group; + k_ex1_bit = neighborKK->k_ex1_bit; + k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; k_ex_mol_bit = neighborKK->k_ex_mol_bit; } @@ -135,16 +135,16 @@ void NPairKokkos::build(NeighList *list_) atomKK->molecular, nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, bininvx,bininvy,bininvz, - exclude, nex_type,maxex_type, + exclude, nex_type, k_ex1_type.view(), k_ex2_type.view(), k_ex_type.view(), - nex_group,maxex_group, + nex_group, k_ex1_group.view(), k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), - nex_mol, maxex_mol, + nex_mol, k_ex_mol_group.view(), k_ex_mol_bit.view(), bboxhi,bboxlo, @@ -161,6 +161,8 @@ void NPairKokkos::build(NeighList *list_) k_ex2_bit.sync(); k_ex_mol_group.sync(); k_ex_mol_bit.sync(); + k_bincount.sync(), + k_bins.sync(), atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); data.special_flag[0] = special_flag[0]; @@ -415,6 +417,7 @@ void NeighborKokkosExecute:: if(n >= new_maxneighs()) new_maxneighs() = n; } + neigh_list.d_ilist(i) = i; } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 666508a22d..4b77175191 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -75,14 +75,6 @@ class NPairKokkos : public NPair { private: int newton_pair; - int nex_type; - int maxex_type; - - int nex_group; - int maxex_group; - - int nex_mol; - int maxex_mol; // data from Neighbor class @@ -127,17 +119,14 @@ class NeighborKokkosExecute const int exclude; const int nex_type; - const int maxex_type; const typename AT::t_int_1d_const ex1_type,ex2_type; const typename AT::t_int_2d_const ex_type; const int nex_group; - const int maxex_group; const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; - const int maxex_mol; const typename AT::t_int_1d_const ex_mol_group; const typename AT::t_int_1d_const ex_mol_bit; @@ -204,16 +193,16 @@ class NeighborKokkosExecute const int & _mbinx,const int & _mbiny,const int & _mbinz, const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, - const int & _exclude,const int & _nex_type,const int & _maxex_type, + const int & _exclude,const int & _nex_type, const typename AT::t_int_1d_const & _ex1_type, const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, - const int & _nex_group,const int & _maxex_group, + const int & _nex_group, const typename AT::t_int_1d_const & _ex1_group, const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, - const int & _nex_mol,const int & _maxex_mol, + const int & _nex_mol, const typename AT::t_int_1d_const & _ex_mol_group, const typename AT::t_int_1d_const & _ex_mol_bit, const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, @@ -229,11 +218,11 @@ class NeighborKokkosExecute mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), - exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type), + exclude(_exclude),nex_type(_nex_type), ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), - nex_group(_nex_group),maxex_group(_maxex_group), + nex_group(_nex_group), ex1_group(_ex1_group),ex2_group(_ex2_group), - ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { diff --git a/src/neighbor.cpp b/src/neighbor.cpp index f27376cb2a..81f33e2479 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -660,10 +660,8 @@ void Neighbor::init_pair() // processes copy,skip,half_from_full,granhistory,respaouter lists // error checks and resets internal ptrs to other lists that now exist - for (i = 0; i < nrequest; i++) { - if (!lists[i]) continue; + for (i = 0; i < nrequest; i++) lists[i]->post_constructor(requests[i]); - } // (B) rule: // if request = pair, half, newton != 2 @@ -680,6 +678,10 @@ void Neighbor::init_pair() for (i = 0; i < nrequest; i++) { if (requests[i]->pair && requests[i]->half && requests[i]->newton != 2) { for (j = 0; j < nrequest; j++) { + // Kokkos doesn't yet support half from full + if (requests[i]->kokkos_device || requests[j]->kokkos_device) continue; + if (requests[i]->kokkos_host || requests[j]->kokkos_host) continue; + if (requests[j]->full && requests[j]->occasional == 0 && !requests[j]->skip && !requests[j]->copy) break; } @@ -706,6 +708,10 @@ void Neighbor::init_pair() for (i = 0; i < nrequest; i++) { if (!requests[i]->fix && !requests[i]->compute) continue; for (j = 0; j < nrequest; j++) { + // Kokkos flags must match + if (requests[i]->kokkos_device != requests[j]->kokkos_device) continue; + if (requests[i]->kokkos_host != requests[j]->kokkos_host) continue; + if (requests[i]->half && requests[j]->pair && !requests[j]->skip && requests[j]->half && !requests[j]->copy) break; @@ -727,6 +733,10 @@ void Neighbor::init_pair() continue; } for (j = 0; j < nrequest; j++) { + // Kokkos doesn't yet support half from full + if (requests[i]->kokkos_device || requests[j]->kokkos_device) continue; + if (requests[i]->kokkos_host || requests[j]->kokkos_host) continue; + if (requests[i]->half && requests[j]->pair && !requests[j]->skip && requests[j]->full && !requests[j]->copy) break; @@ -1434,7 +1444,11 @@ int Neighbor::choose_pair(NeighRequest *rq) for (int i = 0; i < npclass; i++) { mask = pairmasks[i]; - if (copyflag && (mask & NP_COPY)) return i+1; + if (copyflag && (mask & NP_COPY)) { + if (kokkos_device_flag != (mask & NP_KOKKOS_DEVICE)) continue; + if (kokkos_host_flag != (mask & NP_KOKKOS_HOST)) continue; + return i+1; + } if (skipflag != (mask & NP_SKIP)) continue; if (halfflag) {