From 1d939231a49dde46e9e1d18084f23fb189e8e055 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 21 Nov 2016 12:21:01 -0500 Subject: [PATCH 001/267] USER-DPD: initial Kokkos port, first steps from Aug 24th ARL Kokkos hackathon atom_vec_dpd_kokkos pair_dpd_fdt_energy_kokkos without the Oct 7th VV support from e27ed6c --- src/KOKKOS/atom_kokkos.h | 6 + src/USER-DPD/atom_vec_dpd_kokkos.cpp | 1872 +++++++++++++++++++ src/USER-DPD/atom_vec_dpd_kokkos.h | 135 ++ src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp | 373 ++++ src/USER-DPD/pair_dpd_fdt_energy_kokkos.h | 119 ++ 5 files changed, 2505 insertions(+) create mode 100644 src/USER-DPD/atom_vec_dpd_kokkos.cpp create mode 100644 src/USER-DPD/atom_vec_dpd_kokkos.h create mode 100644 src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp create mode 100644 src/USER-DPD/pair_dpd_fdt_energy_kokkos.h diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 05aae712d9..f31c26e01f 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -51,6 +51,12 @@ class AtomKokkos : public Atom { DAT::tdual_int_2d k_improper_type; DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4; + +// USER-DPD package + DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew, + k_rho,k_dpdTheta,k_duChem; + + AtomKokkos(class LAMMPS *); ~AtomKokkos(); diff --git a/src/USER-DPD/atom_vec_dpd_kokkos.cpp b/src/USER-DPD/atom_vec_dpd_kokkos.cpp new file mode 100644 index 0000000000..c58b592e53 --- /dev/null +++ b/src/USER-DPD/atom_vec_dpd_kokkos.cpp @@ -0,0 +1,1872 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include "atom_vec_dpd_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + mass_type = 1; + + comm_x_only = comm_f_only = 0; + size_forward = 7; + size_reverse = 3; + size_border = 12; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + atom->rho_flag = 1; + atom->dpd_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + + memory->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); + memory->grow_kokkos(atomKK->k_dpdTheta,atomKK->dpdTheta,nmax,"atom:dpdTheta"); + memory->grow_kokkos(atomKK->k_uCond,atomKK->uCond,nmax,"atom:uCond"); + memory->grow_kokkos(atomKK->k_uMech,atomKK->uMech,nmax,"atom:uMech"); + memory->grow_kokkos(atomKK->k_uChem,atomKK->uChem,nmax,"atom:uChem"); + memory->grow_kokkos(atomKK->k_uCG,atomKK->uCG,nmax,"atom:uCG"); + memory->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); + memory->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + rho = atomKK->rho; + d_rho = atomKK->k_rho.d_view; + h_rho = atomKK->k_rho.h_view; + dpdTheta = atomKK->dpdTheta; + d_dpdTheta = atomKK->k_dpdTheta.d_view; + h_dpdTheta = atomKK->k_dpdTheta.h_view; + uCond = atomKK->uCond; + d_uCond = atomKK->k_uCond.d_view;; + h_uCond = atomKK->k_uCond.h_view; + uMech = atomKK->uMech; + d_uMech = atomKK->k_uMech.d_view;; + h_uMech = atomKK->k_uMech.h_view; + uChem = atomKK->uChem; + d_uChem = atomKK->k_uChem.d_view;; + h_uChem = atomKK->k_uChem.h_view; + uCG = atomKK->uCG; + d_uCG = atomKK->k_uCG.d_view;; + h_uCG = atomKK->k_uCG.h_view; + uCGnew = atomKK->uCGnew; + d_uCGnew = atomKK->k_uCGnew.d_view;; + h_uCGnew = atomKK->k_uCGnew.h_view; + duChem = atomKK->duChem; + d_duChem = atomKK->k_duChem.d_view;; + h_duChem = atomKK->k_duChem.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::copy(int i, int j, int delflag) +{ + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + h_dpdTheta[j] = h_dpdTheta[i]; + h_uCond[j] = h_uCond[i]; + h_uMech[j] = h_uMech[i]; + h_uChem[j] = h_uChem[i]; + h_uCG[j] = h_uCG[i]; + h_uCGnew[j] = h_uCGnew[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename ArrayTypes::t_xfloat_2d_um _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecDPDKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _buf(i,3) = _dpdTheta(j); + _buf(i,4) = _uCond(j); + _buf(i,5) = _uMech(j); + _buf(i,6) = _uChem(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_x_array _xw; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + int _nfirst; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecDPDKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()),_xw(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _nfirst(nfirst),_list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _dpdTheta(i+_nfirst) = _dpdTheta(j); + _uCond(i+_nfirst) = _uCond(j); + _uMech(i+_nfirst) = _uMech(j); + _uChem(i+_nfirst) = _uChem(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename ArrayTypes::t_xfloat_2d_const _buf; + int _first; + + AtomVecDPDKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _buf(buf.view()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _dpdTheta(i+_first) = _buf(i,3); + _uCond(i+_first) = _buf(i,4); + _uMech(i+_first) = _buf(i,5); + _uChem(i+_first) = _buf(i,6); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = dpdTheta[j]; + buf[m++] = uCond[j]; + buf[m++] = uMech[j]; + buf[m++] = uChem[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_dpdTheta[i] = buf[m++]; + h_uCond[i] = buf[m++]; + h_uMech[i] = buf[m++]; + h_uChem[i] = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_dpdTheta[i] = buf[m++]; + h_uCond[i] = buf[m++]; + h_uMech[i] = buf[m++]; + h_uChem[i] = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) { + sync(Host,F_MASK); + modified(Host,F_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes::t_xfloat_2d _buf; + const typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes::t_x_array_randomread _x; + const typename ArrayTypes::t_tagint_1d _tag; + const typename ArrayTypes::t_int_1d _type; + const typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + X_FLOAT _dx,_dy,_dz; + + AtomVecDPDKokkos_PackBorder( + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_efloat_1d &dpdTheta, + const typename ArrayTypes::t_efloat_1d &uCond, + const typename ArrayTypes::t_efloat_1d &uMech, + const typename ArrayTypes::t_efloat_1d &uChem, + const typename ArrayTypes::t_efloat_1d &uCG, + const typename ArrayTypes::t_efloat_1d &uCGnew, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _dpdTheta(dpdTheta), + _uCond(uCond), + _uMech(uMech), + _uChem(uChem), + _uCG(uCGnew), + _uCGnew(uCGnew), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + } + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _dpdTheta(j); + _buf(i,7) = _uCond(j); + _buf(i,8) = _uMech(j); + _buf(i,9) = _uChem(j); + _buf(i,10) = _uCG(j); + _buf(i,11) = _uCGnew(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*6; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + buf[m++] = h_uCG[j]; + buf[m++] = h_uCGnew[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + buf[m++] = h_uCG[j]; + buf[m++] = h_uCGnew[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes::t_xfloat_2d_const _buf; + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_tagint_1d _tag; + typename ArrayTypes::t_int_1d _type; + typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + + + AtomVecDPDKokkos_UnpackBorder( + const typename ArrayTypes::t_xfloat_2d_const &buf, + typename ArrayTypes::t_x_array &x, + typename ArrayTypes::t_tagint_1d &tag, + typename ArrayTypes::t_int_1d &type, + typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_efloat_1d &dpdTheta, + const typename ArrayTypes::t_efloat_1d &uCond, + const typename ArrayTypes::t_efloat_1d &uMech, + const typename ArrayTypes::t_efloat_1d &uChem, + const typename ArrayTypes::t_efloat_1d &uCG, + const typename ArrayTypes::t_efloat_1d &uCGnew, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), + _dpdTheta(dpdTheta), + _uCond(uCond), + _uMech(uMech), + _uChem(uChem), + _uCG(uCGnew), + _uCGnew(uCGnew), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast (_buf(i,3)); + _type(i+_first) = static_cast (_buf(i,4)); + _mask(i+_first) = static_cast (_buf(i,5)); + _dpdTheta(i+_first) = _buf(i,6); + _uCond(i+_first) = _buf(i,7); + _uMech(i+_first) = _buf(i,8); + _uChem(i+_first) = _buf(i,9); + _uCG(i+_first) = _buf(i,10); + _uCGnew(i+_first) = _buf(i,11); +// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + if(space==Host) { + struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), + h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), + d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_efloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_efloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecDPDKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _dpdTheta(atom->k_dpdTheta.view()), + _uCond(atom->k_uCond.view()), + _uMech(atom->k_uMech.view()), + _uChem(atom->k_uChem.view()), + _uCG(atom->k_uCG.view()), + _uCGnew(atom->k_uCGnew.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _dpdThetaw(atom->k_dpdTheta.view()), + _uCondw(atom->k_uCond.view()), + _uMechw(atom->k_uMech.view()), + _uChemw(atom->k_uChem.view()), + _uCGw(atom->k_uCG.view()), + _uCGneww(atom->k_uCGnew.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 17; + const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 17; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = _tag[i]; + _buf(mysend,8) = _type[i]; + _buf(mysend,9) = _mask[i]; + _buf(mysend,10) = _image[i]; + _buf(mysend,11) = _dpdTheta[i]; + _buf(mysend,12) = _uCond[i]; + _buf(mysend,13) = _uMech[i]; + _buf(mysend,14) = _uChem[i]; + _buf(mysend,15) = _uCG[i]; + _buf(mysend,16) = _uCGnew[i]; + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw[i] = _tag(j); + _typew[i] = _type(j); + _maskw[i] = _mask(j); + _imagew[i] = _image(j); + _dpdThetaw[i] = _dpdTheta(j); + _uCondw[i] = _uCond(j); + _uMechw[i] = _uMech(j); + _uChemw[i] = _uChem(j); + _uCGw[i] = _uCG(j); + _uCGneww[i] = _uCGnew(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +{ + if(nsend > (int) (k_buf.view().dimension_0()*k_buf.view().dimension_1())/17) { + int newsize = nsend*17/k_buf.view().dimension_1()+1; + k_buf.resize(newsize,k_buf.view().dimension_1()); + } + if(space == Host) { + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*17; + } else { + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*17; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_exchange(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_dpdTheta[i]; + buf[m++] = h_uCond[i]; + buf[m++] = h_uMech[i]; + buf[m++] = h_uChem[i]; + buf[m++] = h_uCG[i]; + buf[m++] = h_uCGnew[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_efloat_1d _dpdTheta; + typename AT::t_efloat_1d _uCond; + typename AT::t_efloat_1d _uMech; + typename AT::t_efloat_1d _uChem; + typename AT::t_efloat_1d _uCG; + typename AT::t_efloat_1d _uCGnew; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecDPDKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 17; + const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = _buf(myrecv,7); + _type[i] = _buf(myrecv,8); + _mask[i] = _buf(myrecv,9); + _image[i] = _buf(myrecv,10); + _dpdTheta[i] = _buf(myrecv,11); + _uCond[i] = _buf(myrecv,12); + _uMech[i] = _buf(myrecv,13); + _uChem[i] = _buf(myrecv,14); + _uCG[i] = _buf(myrecv,15); + _uCGnew[i] = _buf(myrecv,16); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/17,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/17,f); + LMPDeviceType::fence(); + k_count.modify(); + k_count.sync(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_dpdTheta[nlocal] = buf[m++]; + h_uCond[nlocal] = buf[m++]; + h_uMech[nlocal] = buf[m++]; + h_uChem[nlocal] = buf[m++]; + h_uCG[nlocal] = buf[m++]; + h_uCGnew[nlocal] = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 15 * nlocal; // 11 + dpdTheta + uCond + uMech + uChem + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_restart(int i, double *buf) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK ); + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = h_dpdTheta[i]; + buf[m++] = h_uCond[i]; + buf[m++] = h_uMech[i]; + buf[m++] = h_uChem[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK ); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_dpdTheta[nlocal] = buf[m++]; + h_uCond[nlocal] = buf[m++]; + h_uMech[nlocal] = buf[m++]; + h_uChem[nlocal] = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]); + atomKK->modified(Host,ALL_MASK); + grow(0); + //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((tagint) IMGMAX << IMG2BITS) | + ((tagint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_rho[nlocal] = 0.0; + h_dpdTheta[nlocal] = 0.0; + h_uCond[nlocal] = 0.0; + h_uMech[nlocal] = 0.0; + h_uChem[nlocal] = 0.0; + h_uCG[nlocal] = 0.0; + h_uCGnew[nlocal] = 0.0; + h_duChem[nlocal] = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::data_atom(double *coord, tagint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag[nlocal] = ATOTAGINT(values[0]); + h_type[nlocal] = atoi(values[1]); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_dpdTheta[nlocal] = atof(values[2]); + if (h_dpdTheta[nlocal] <= 0) + error->one(FLERR,"Internal temperature in Atoms section of date file must be > zero"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image[nlocal] = imagetmp; + + h_mask[nlocal] = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_rho[nlocal] = 0.0; + h_uCond[nlocal] = 0.0; + h_uMech[nlocal] = 0.0; + h_uChem[nlocal] = 0.0; + h_uCG[nlocal] = 0.0; + h_uCGnew[nlocal] = 0.0; + + atomKK->modified(Host,ALL_MASK); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_dpdTheta(nlocal) = atof(values[0]); + + return 1; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag(i)).d; + buf[i][1] = ubuf(h_type(i)).d; + buf[i][2] = h_dpdTheta(i); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_dpdTheta(i); + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT " %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, + buf[i][2],buf[i][3],buf[i][4],buf[i][5], + (int) ubuf(buf[i][6]).i,(int) ubuf(buf[i][7]).i, + (int) ubuf(buf[i][8]).i); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e",buf[0]); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecDPDKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + if (atom->memcheck("rho")) bytes += memory->usage(rho,nmax); + if (atom->memcheck("dpdTheta")) bytes += memory->usage(dpdTheta,nmax); + if (atom->memcheck("uCond")) bytes += memory->usage(uCond,nmax); + if (atom->memcheck("uMech")) bytes += memory->usage(uMech,nmax); + if (atom->memcheck("uChem")) bytes += memory->usage(uChem,nmax); + if (atom->memcheck("uCG")) bytes += memory->usage(uCG,nmax); + if (atom->memcheck("uCGnew")) bytes += memory->usage(uCGnew,nmax); + if (atom->memcheck("duChem")) bytes += memory->usage(duChem,nmax); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + } else { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + } else { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + } +} + diff --git a/src/USER-DPD/atom_vec_dpd_kokkos.h b/src/USER-DPD/atom_vec_dpd_kokkos.h new file mode 100644 index 0000000000..d108e58ae7 --- /dev/null +++ b/src/USER-DPD/atom_vec_dpd_kokkos.h @@ -0,0 +1,135 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(dpd/kk,AtomVecDPDKokkos) + +#else + +#ifndef LMP_ATOM_VEC_DPD_KOKKOS_H +#define LMP_ATOM_VEC_DPD_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecDPDKokkos : public AtomVecKokkos { + public: + AtomVecDPDKokkos(class LAMMPS *); + virtual ~AtomVecDPDKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + int pack_comm_hybrid(int, int *, double *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int unpack_comm_hybrid(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + double *uCond,*uMech,*uChem,*uCG,*uCGnew,*rho,*dpdTheta; + double *duChem; + + protected: + DAT::t_efloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; + HAT::t_efloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; + + tagint *tag; + imageint *image; + int *type,*mask; + double **x,**v,**f; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +*/ diff --git a/src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp b/src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp new file mode 100644 index 0000000000..f7e1fecc09 --- /dev/null +++ b/src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp @@ -0,0 +1,373 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: James Larentzos (U.S. Army Research Laboratory) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_dpd_fdt_energy_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "atom_vec.h" +#include "comm.h" +#include "update.h" +#include "fix.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "random_mars.h" +#include "math_const.h" +#include "memory.h" +#include "modify.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +#define EPSILON 1.0e-10 + +/* ---------------------------------------------------------------------- */ + +template +PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDPDfdtEnergy(lmp) +{ + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + cutsq = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template +PairDPDfdtEnergyKokkos::~PairDPDfdtEnergyKokkos() +{ + if (allocated) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + k_cutsq = DAT::tdual_ffloat_2d(); + memory->sfree(cutsq); + eatom = NULL; + vatom = NULL; + cutsq = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairDPDfdtEnergyKokkos::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairDPDfdtEnergyKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT r = sqrt(rsq); + if (r < EPSILON) return 0; // r can be 0.0 in DPD systems + const F_FLOAT rinv = 1.0/r; + const F_FLOAT wr = 1.0 - r/cut[itype][jtype]; + const F_FLOAT wd = wr*wr; + + // conservative force = a0 * wr + return a0[itype][jtype]*wr*rinv; +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairDPDfdtEnergyKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT r = sqrt(rsq); + if (r < EPSILON) return 0; // r can be 0.0 in DPD systems + const F_FLOAT rinv = 1.0/r; + const F_FLOAT wr = 1.0 - r/cut[itype][jtype]; + const F_FLOAT wd = wr*wr; + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + return 0.5*a0[itype][jtype]*cut[itype][jtype] * wd; +} + + +/* + int i,j,ii,jj,inum,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,rinv,wd,wr,factor_dpd; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_dpd = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + wr = 1.0 - r/cut[itype][jtype]; + wd = wr*wr; + + // conservative force = a0 * wr + fpair = a0[itype][jtype]*wr; + fpair *= factor_dpd*rinv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd; + evdwl *= factor_dpd; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} +*/ + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairDPDfdtEnergyKokkos::allocate() +{ + PairDPDfdtEnergy::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairDPDfdtEnergyKokkos::settings(int narg, char **arg) +{ + if (narg != 2) error->all(FLERR,"Illegal pair_style command"); + + PairDPDfdtEnergy::settings(2,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairDPDfdtEnergyKokkos::init_style() +{ + PairDPDfdtEnergy::init_style(); + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk"); + } + +/* + if (comm->ghost_velocity == 0) + error->all(FLERR,"Pair dpd/fdt/energy requires ghost atoms store velocity"); + + // if newton off, forces between atoms ij will be double computed + // using different random numbers + + if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR, + "Pair dpd/fdt/energy requires newton pair on"); + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->ssa = 0; + for (int i = 0; i < modify->nfix; i++) + if (strcmp(modify->fix[i]->style,"shardlow") == 0) + neighbor->requests[irequest]->ssa = 1; + + bool eos_flag = false; + for (int i = 0; i < modify->nfix; i++) + if (strncmp(modify->fix[i]->style,"eos",3) == 0) eos_flag = true; + if(!eos_flag) error->all(FLERR,"pair_style dpd/fdt/energy requires an EOS to be specified"); +*/ +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairDPDfdtEnergyKokkos::init_one(int i, int j) +{ + double cutone = PairDPDfdtEnergy::init_one(i,j); + + if(i(); + + return cutone; +} + + +namespace LAMMPS_NS { +template class PairDPDfdtEnergyKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairDPDfdtEnergyKokkos; +#endif +} + diff --git a/src/USER-DPD/pair_dpd_fdt_energy_kokkos.h b/src/USER-DPD/pair_dpd_fdt_energy_kokkos.h new file mode 100644 index 0000000000..a8a5f25801 --- /dev/null +++ b/src/USER-DPD/pair_dpd_fdt_energy_kokkos.h @@ -0,0 +1,119 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/fdt/energy/kk,PairDPDfdtEnergyKokkos) +PairStyle(dpd/fdt/energy/kk/device,PairDPDfdtEnergyKokkos) +PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) + +#else + +#ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H +#define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_dpd_fdt_energy.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { + public: + enum {EnabledNeighFlags=HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + PairDPDfdtEnergyKokkos(class LAMMPS *); + virtual ~PairDPDfdtEnergyKokkos(); + virtual void compute(int, int); + virtual void settings(int, char **); + void init_style(); + double init_one(int, int); + + protected: + void cleanup_copy(); + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename ArrayTypes::t_x_array_randomread x; + typename ArrayTypes::t_x_array c_x; + typename ArrayTypes::t_f_array f; + typename ArrayTypes::t_int_1d_randomread type; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename ArrayTypes::t_efloat_1d d_eatom; + typename ArrayTypes::t_virial_array d_vatom; + typename ArrayTypes::t_tagint_1d tag; + + int newton_pair; + double special_lj[4]; + + typename ArrayTypes::tdual_ffloat_2d k_cutsq; + typename ArrayTypes::t_ffloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate(); + + friend class PairComputeFunctor; + friend class PairComputeFunctor; + friend class PairComputeFunctor; + friend class PairComputeFunctor; + friend EV_FLOAT pair_compute_neighlist(PairDPDfdtEnergyKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairDPDfdtEnergyKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairDPDfdtEnergyKokkos*,NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairDPDfdtEnergyKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair dpd/fdt/energy requires ghost atoms store velocity + +Use the communicate vel yes command to enable this. + +E: Pair dpd/fdt/energy requires newton pair on + +Self-explanatory. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +*/ From 8f78157202299a5bf9d860c90f30c8340d2d0cfc Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 21 Nov 2016 12:32:48 -0500 Subject: [PATCH 002/267] USER-DPD: aplly unpack_comm_hybrid bugfix d31121b to atom_vec_dpd_kokkos.cpp --- src/USER-DPD/atom_vec_dpd_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/USER-DPD/atom_vec_dpd_kokkos.cpp b/src/USER-DPD/atom_vec_dpd_kokkos.cpp index c58b592e53..c79559172f 100644 --- a/src/USER-DPD/atom_vec_dpd_kokkos.cpp +++ b/src/USER-DPD/atom_vec_dpd_kokkos.cpp @@ -1205,6 +1205,8 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_uCond(i) = buf[m++]; h_uMech(i) = buf[m++]; h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; } return m; } From 75907916045ff25745389db4b11773c820bc13de Mon Sep 17 00:00:00 2001 From: stamoor Date: Mon, 21 Nov 2016 13:54:14 -0700 Subject: [PATCH 003/267] Integrating atom_vec_dpd into the Kokkos package --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 1874 ++++++++++++++++++++++++++++ src/KOKKOS/atom_vec_dpd_kokkos.h | 135 ++ 2 files changed, 2009 insertions(+) create mode 100644 src/KOKKOS/atom_vec_dpd_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_dpd_kokkos.h diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp new file mode 100644 index 0000000000..c79559172f --- /dev/null +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -0,0 +1,1874 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include "atom_vec_dpd_kokkos.h" +#include "atom_kokkos.h" +#include "comm_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define DELTA 10000 + +/* ---------------------------------------------------------------------- */ + +AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) +{ + molecular = 0; + mass_type = 1; + + comm_x_only = comm_f_only = 0; + size_forward = 7; + size_reverse = 3; + size_border = 12; + size_velocity = 3; + size_data_atom = 6; + size_data_vel = 4; + xcol_data = 4; + + atom->rho_flag = 1; + atom->dpd_flag = 1; + + k_count = DAT::tdual_int_1d("atom::k_count",1); + atomKK = (AtomKokkos *) atom; + commKK = (CommKokkos *) comm; +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by DELTA + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::grow(int n) +{ + if (n == 0) nmax += DELTA; + else nmax = n; + atomKK->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + sync(Device,ALL_MASK); + modified(Device,ALL_MASK); + + memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + + memory->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); + memory->grow_kokkos(atomKK->k_dpdTheta,atomKK->dpdTheta,nmax,"atom:dpdTheta"); + memory->grow_kokkos(atomKK->k_uCond,atomKK->uCond,nmax,"atom:uCond"); + memory->grow_kokkos(atomKK->k_uMech,atomKK->uMech,nmax,"atom:uMech"); + memory->grow_kokkos(atomKK->k_uChem,atomKK->uChem,nmax,"atom:uChem"); + memory->grow_kokkos(atomKK->k_uCG,atomKK->uCG,nmax,"atom:uCG"); + memory->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); + memory->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); + + grow_reset(); + sync(Host,ALL_MASK); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + rho = atomKK->rho; + d_rho = atomKK->k_rho.d_view; + h_rho = atomKK->k_rho.h_view; + dpdTheta = atomKK->dpdTheta; + d_dpdTheta = atomKK->k_dpdTheta.d_view; + h_dpdTheta = atomKK->k_dpdTheta.h_view; + uCond = atomKK->uCond; + d_uCond = atomKK->k_uCond.d_view;; + h_uCond = atomKK->k_uCond.h_view; + uMech = atomKK->uMech; + d_uMech = atomKK->k_uMech.d_view;; + h_uMech = atomKK->k_uMech.h_view; + uChem = atomKK->uChem; + d_uChem = atomKK->k_uChem.d_view;; + h_uChem = atomKK->k_uChem.h_view; + uCG = atomKK->uCG; + d_uCG = atomKK->k_uCG.d_view;; + h_uCG = atomKK->k_uCG.h_view; + uCGnew = atomKK->uCGnew; + d_uCGnew = atomKK->k_uCGnew.d_view;; + h_uCGnew = atomKK->k_uCGnew.h_view; + duChem = atomKK->duChem; + d_duChem = atomKK->k_duChem.d_view;; + h_duChem = atomKK->k_duChem.h_view; +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::copy(int i, int j, int delflag) +{ + h_tag[j] = h_tag[i]; + h_type[j] = h_type[i]; + mask[j] = mask[i]; + h_image[j] = h_image[i]; + h_x(j,0) = h_x(i,0); + h_x(j,1) = h_x(i,1); + h_x(j,2) = h_x(i,2); + h_v(j,0) = h_v(i,0); + h_v(j,1) = h_v(i,1); + h_v(j,2) = h_v(i,2); + h_dpdTheta[j] = h_dpdTheta[i]; + h_uCond[j] = h_uCond[i]; + h_uMech[j] = h_uMech[i]; + h_uChem[j] = h_uChem[i]; + h_uCG[j] = h_uCG[i]; + h_uCGnew[j] = h_uCGnew[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename ArrayTypes::t_xfloat_2d_um _buf; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecDPDKokkos_PackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const typename DAT::tdual_xfloat_2d &buf, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; + const size_t elements = 3; + buffer_view(_buf,buf,maxsend,elements); + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _buf(i,3) = _dpdTheta(j); + _buf(i,4) = _uCond(j); + _buf(i,5) = _uMech(j); + _buf(i,6) = _uChem(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, + const DAT::tdual_int_2d &list, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, + const int* const pbc) +{ + // Check whether to always run forward communication on the host + // Choose correct forward PackComm kernel + + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + + return n*size_forward; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackCommSelf { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_x_array _xw; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + int _nfirst; + typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + X_FLOAT _pbc[6]; + + AtomVecDPDKokkos_PackCommSelf( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const int &nfirst, + const typename DAT::tdual_int_2d &list, + const int & iswap, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): + _x(x.view()),_xw(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _nfirst(nfirst),_list(list.view()),_iswap(iswap), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) { + _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; + _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; + }; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; + } + } + _dpdTheta(i+_nfirst) = _dpdTheta(j); + _uCond(i+_nfirst) = _uCond(j); + _uMech(i+_nfirst) = _uMech(j); + _uChem(i+_nfirst) = _uChem(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, + const int nfirst, const int &pbc_flag, const int* const pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPHostType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(pbc_flag) { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } else { + if(domain->triclinic) { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + nfirst,list,iswap, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz,pbc); + Kokkos::parallel_for(n,f); + } + } + LMPDeviceType::fence(); + } + return n*3; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackComm { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; + typename ArrayTypes::t_xfloat_2d_const _buf; + int _first; + + AtomVecDPDKokkos_UnpackComm( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_efloat_1d &dpdTheta, + const typename DAT::tdual_efloat_1d &uCond, + const typename DAT::tdual_efloat_1d &uMech, + const typename DAT::tdual_efloat_1d &uChem, + const typename DAT::tdual_xfloat_2d &buf, + const int& first):_x(x.view()), + _dpdTheta(dpdTheta.view()), + _uCond(uCond.view()), + _uMech(uMech.view()), + _uChem(uChem.view()), + _buf(buf.view()), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _dpdTheta(i+_first) = _buf(i,3); + _uCond(i+_first) = _buf(i,4); + _uMech(i+_first) = _buf(i,5); + _uChem(i+_first) = _buf(i,6); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf ) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, + atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, + buf,first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = dpdTheta[j]; + buf[m++] = uCond[j]; + buf[m++] = uMech[j]; + buf[m++] = uChem[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + } + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_dpdTheta[i] = buf[m++]; + h_uCond[i] = buf[m++]; + h_uMech[i] = buf[m++]; + h_uChem[i] = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_dpdTheta[i] = buf[m++]; + h_uCond[i] = buf[m++]; + h_uMech[i] = buf[m++]; + h_uChem[i] = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) +{ + if(n > 0) + sync(Host,F_MASK); + + int m = 0; + const int last = first + n; + for (int i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf) +{ + if(n > 0) { + sync(Host,F_MASK); + modified(Host,F_MASK); + } + + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackBorder { + typedef DeviceType device_type; + + typename ArrayTypes::t_xfloat_2d _buf; + const typename ArrayTypes::t_int_2d_const _list; + const int _iswap; + const typename ArrayTypes::t_x_array_randomread _x; + const typename ArrayTypes::t_tagint_1d _tag; + const typename ArrayTypes::t_int_1d _type; + const typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + X_FLOAT _dx,_dy,_dz; + + AtomVecDPDKokkos_PackBorder( + const typename ArrayTypes::t_xfloat_2d &buf, + const typename ArrayTypes::t_int_2d_const &list, + const int & iswap, + const typename ArrayTypes::t_x_array &x, + const typename ArrayTypes::t_tagint_1d &tag, + const typename ArrayTypes::t_int_1d &type, + const typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_efloat_1d &dpdTheta, + const typename ArrayTypes::t_efloat_1d &uCond, + const typename ArrayTypes::t_efloat_1d &uMech, + const typename ArrayTypes::t_efloat_1d &uChem, + const typename ArrayTypes::t_efloat_1d &uCG, + const typename ArrayTypes::t_efloat_1d &uCGnew, + const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): + _buf(buf),_list(list),_iswap(iswap), + _x(x),_tag(tag),_type(type),_mask(mask), + _dpdTheta(dpdTheta), + _uCond(uCond), + _uMech(uMech), + _uChem(uChem), + _uCG(uCGnew), + _uCGnew(uCGnew), + _dx(dx),_dy(dy),_dz(dz) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + const int j = _list(_iswap,i); + if (PBC_FLAG == 0) { + _buf(i,0) = _x(j,0); + _buf(i,1) = _x(j,1); + _buf(i,2) = _x(j,2); + } else { + _buf(i,0) = _x(j,0) + _dx; + _buf(i,1) = _x(j,1) + _dy; + _buf(i,2) = _x(j,2) + _dz; + } + _buf(i,3) = _tag(j); + _buf(i,4) = _type(j); + _buf(i,5) = _mask(j); + _buf(i,6) = _dpdTheta(j); + _buf(i,7) = _uCond(j); + _buf(i,8) = _uMech(j); + _buf(i,9) = _uChem(j); + _buf(i,10) = _uCG(j); + _buf(i,11) = _uCGnew(j); + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + X_FLOAT dx,dy,dz; + + if (pbc_flag != 0) { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if(space==Host) { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + + } else { + dx = dy = dz = 0; + if(space==Host) { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + AtomVecDPDKokkos_PackBorder f( + buf.view(), k_sendlist.view(), + iswap,d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + dx,dy,dz); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } + } + return n*6; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,m; + double dx,dy,dz,dvx,dvy,dvz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag(j)).d; + buf[m++] = ubuf(h_type(j)).d; + buf[m++] = ubuf(h_mask(j)).d; + if (mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + buf[m++] = h_dpdTheta(j); + buf[m++] = h_uCond(j); + buf[m++] = h_uMech(j); + buf[m++] = h_uChem(j); + buf[m++] = h_uCG(j); + buf[m++] = h_uCGnew(j); + } + } + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + buf[m++] = h_uCG[j]; + buf[m++] = h_uCGnew[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; + buf[m++] = h_uCG[j]; + buf[m++] = h_uCGnew[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackBorder { + typedef DeviceType device_type; + + const typename ArrayTypes::t_xfloat_2d_const _buf; + typename ArrayTypes::t_x_array _x; + typename ArrayTypes::t_tagint_1d _tag; + typename ArrayTypes::t_int_1d _type; + typename ArrayTypes::t_int_1d _mask; + typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + int _first; + + + AtomVecDPDKokkos_UnpackBorder( + const typename ArrayTypes::t_xfloat_2d_const &buf, + typename ArrayTypes::t_x_array &x, + typename ArrayTypes::t_tagint_1d &tag, + typename ArrayTypes::t_int_1d &type, + typename ArrayTypes::t_int_1d &mask, + const typename ArrayTypes::t_efloat_1d &dpdTheta, + const typename ArrayTypes::t_efloat_1d &uCond, + const typename ArrayTypes::t_efloat_1d &uMech, + const typename ArrayTypes::t_efloat_1d &uChem, + const typename ArrayTypes::t_efloat_1d &uCG, + const typename ArrayTypes::t_efloat_1d &uCGnew, + const int& first): + _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), + _dpdTheta(dpdTheta), + _uCond(uCond), + _uMech(uMech), + _uChem(uChem), + _uCG(uCGnew), + _uCGnew(uCGnew), + _first(first) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + _x(i+_first,0) = _buf(i,0); + _x(i+_first,1) = _buf(i,1); + _x(i+_first,2) = _buf(i,2); + _tag(i+_first) = static_cast (_buf(i,3)); + _type(i+_first) = static_cast (_buf(i,4)); + _mask(i+_first) = static_cast (_buf(i,5)); + _dpdTheta(i+_first) = _buf(i,6); + _uCond(i+_first) = _buf(i,7); + _uMech(i+_first) = _buf(i,8); + _uChem(i+_first) = _buf(i,9); + _uCG(i+_first) = _buf(i,10); + _uCGnew(i+_first) = _buf(i,11); +// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); + } +}; + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, + const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + while (first+n >= nmax) grow(0); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + if(space==Host) { + struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), + h_x,h_tag,h_type,h_mask, + h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, + first); + Kokkos::parallel_for(n,f); + LMPHostType::fence(); + } else { + struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), + d_x,d_tag,d_type,d_mask, + d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, + first); + Kokkos::parallel_for(n,f); + LMPDeviceType::fence(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag(i) = (tagint) ubuf(buf[m++]).i; + h_type(i) = (int) ubuf(buf[m++]).i; + h_mask(i) = (int) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_dpdTheta(i) = buf[m++]; + h_uCond(i) = buf[m++]; + h_uMech(i) = buf[m++]; + h_uChem(i) = buf[m++]; + h_uCG(i) = buf[m++]; + h_uCGnew(i) = buf[m++]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_PackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array_randomread _x; + typename AT::t_v_array_randomread _v; + typename AT::t_tagint_1d_randomread _tag; + typename AT::t_int_1d_randomread _type; + typename AT::t_int_1d_randomread _mask; + typename AT::t_imageint_1d_randomread _image; + typename AT::t_efloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; + typename AT::t_x_array _xw; + typename AT::t_v_array _vw; + typename AT::t_tagint_1d _tagw; + typename AT::t_int_1d _typew; + typename AT::t_int_1d _maskw; + typename AT::t_imageint_1d _imagew; + typename AT::t_efloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d_const _sendlist; + typename AT::t_int_1d_const _copylist; + int _nlocal,_dim; + X_FLOAT _lo,_hi; + + AtomVecDPDKokkos_PackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d sendlist, + typename AT::tdual_int_1d copylist,int nlocal, int dim, + X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _dpdTheta(atom->k_dpdTheta.view()), + _uCond(atom->k_uCond.view()), + _uMech(atom->k_uMech.view()), + _uChem(atom->k_uChem.view()), + _uCG(atom->k_uCG.view()), + _uCGnew(atom->k_uCGnew.view()), + _xw(atom->k_x.view()), + _vw(atom->k_v.view()), + _tagw(atom->k_tag.view()), + _typew(atom->k_type.view()), + _maskw(atom->k_mask.view()), + _imagew(atom->k_image.view()), + _dpdThetaw(atom->k_dpdTheta.view()), + _uCondw(atom->k_uCond.view()), + _uMechw(atom->k_uMech.view()), + _uChemw(atom->k_uChem.view()), + _uCGw(atom->k_uCG.view()), + _uCGneww(atom->k_uCGnew.view()), + _sendlist(sendlist.template view()), + _copylist(copylist.template view()), + _nlocal(nlocal),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 17; + const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &mysend) const { + const int i = _sendlist(mysend); + _buf(mysend,0) = 17; + _buf(mysend,1) = _x(i,0); + _buf(mysend,2) = _x(i,1); + _buf(mysend,3) = _x(i,2); + _buf(mysend,4) = _v(i,0); + _buf(mysend,5) = _v(i,1); + _buf(mysend,6) = _v(i,2); + _buf(mysend,7) = _tag[i]; + _buf(mysend,8) = _type[i]; + _buf(mysend,9) = _mask[i]; + _buf(mysend,10) = _image[i]; + _buf(mysend,11) = _dpdTheta[i]; + _buf(mysend,12) = _uCond[i]; + _buf(mysend,13) = _uMech[i]; + _buf(mysend,14) = _uChem[i]; + _buf(mysend,15) = _uCG[i]; + _buf(mysend,16) = _uCGnew[i]; + const int j = _copylist(mysend); + + if(j>-1) { + _xw(i,0) = _x(j,0); + _xw(i,1) = _x(j,1); + _xw(i,2) = _x(j,2); + _vw(i,0) = _v(j,0); + _vw(i,1) = _v(j,1); + _vw(i,2) = _v(j,2); + _tagw[i] = _tag(j); + _typew[i] = _type(j); + _maskw[i] = _mask(j); + _imagew[i] = _image(j); + _dpdThetaw[i] = _dpdTheta(j); + _uCondw[i] = _uCond(j); + _uMechw[i] = _uMech(j); + _uChemw[i] = _uChem(j); + _uCGw[i] = _uCG(j); + _uCGneww[i] = _uCGnew(j); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) +{ + if(nsend > (int) (k_buf.view().dimension_0()*k_buf.view().dimension_1())/17) { + int newsize = nsend*17/k_buf.view().dimension_1()+1; + k_buf.resize(newsize,k_buf.view().dimension_1()); + } + if(space == Host) { + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPHostType::fence(); + return nsend*17; + } else { + AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); + Kokkos::parallel_for(nsend,f); + LMPDeviceType::fence(); + return nsend*17; + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_exchange(int i, double *buf) +{ + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_dpdTheta[i]; + buf[m++] = h_uCond[i]; + buf[m++] = h_uMech[i]; + buf[m++] = h_uChem[i]; + buf[m++] = h_uCG[i]; + buf[m++] = h_uCGnew[i]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecDPDKokkos_UnpackExchangeFunctor { + typedef DeviceType device_type; + typedef ArrayTypes AT; + typename AT::t_x_array _x; + typename AT::t_v_array _v; + typename AT::t_tagint_1d _tag; + typename AT::t_int_1d _type; + typename AT::t_int_1d _mask; + typename AT::t_imageint_1d _image; + typename AT::t_efloat_1d _dpdTheta; + typename AT::t_efloat_1d _uCond; + typename AT::t_efloat_1d _uMech; + typename AT::t_efloat_1d _uChem; + typename AT::t_efloat_1d _uCG; + typename AT::t_efloat_1d _uCGnew; + + typename AT::t_xfloat_2d_um _buf; + typename AT::t_int_1d _nlocal; + int _dim; + X_FLOAT _lo,_hi; + + AtomVecDPDKokkos_UnpackExchangeFunctor( + const AtomKokkos* atom, + const typename AT::tdual_xfloat_2d buf, + typename AT::tdual_int_1d nlocal, + int dim, X_FLOAT lo, X_FLOAT hi): + _x(atom->k_x.view()), + _v(atom->k_v.view()), + _tag(atom->k_tag.view()), + _type(atom->k_type.view()), + _mask(atom->k_mask.view()), + _image(atom->k_image.view()), + _nlocal(nlocal.template view()),_dim(dim), + _lo(lo),_hi(hi){ + const size_t elements = 17; + const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; + + buffer_view(_buf,buf,maxsendlist,elements); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &myrecv) const { + X_FLOAT x = _buf(myrecv,_dim+1); + if (x >= _lo && x < _hi) { + int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); + _x(i,0) = _buf(myrecv,1); + _x(i,1) = _buf(myrecv,2); + _x(i,2) = _buf(myrecv,3); + _v(i,0) = _buf(myrecv,4); + _v(i,1) = _buf(myrecv,5); + _v(i,2) = _buf(myrecv,6); + _tag[i] = _buf(myrecv,7); + _type[i] = _buf(myrecv,8); + _mask[i] = _buf(myrecv,9); + _image[i] = _buf(myrecv,10); + _dpdTheta[i] = _buf(myrecv,11); + _uCond[i] = _buf(myrecv,12); + _uMech[i] = _buf(myrecv,13); + _uChem[i] = _buf(myrecv,14); + _uCG[i] = _buf(myrecv,15); + _uCGnew[i] = _buf(myrecv,16); + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { + if(space == Host) { + k_count.h_view(0) = nlocal; + AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/17,f); + LMPHostType::fence(); + return k_count.h_view(0); + } else { + k_count.h_view(0) = nlocal; + k_count.modify(); + k_count.sync(); + AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); + Kokkos::parallel_for(nrecv/17,f); + LMPDeviceType::fence(); + k_count.modify(); + k_count.sync(); + + return k_count.h_view(0); + } +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_exchange(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_dpdTheta[nlocal] = buf[m++]; + h_uCond[nlocal] = buf[m++]; + h_uMech[nlocal] = buf[m++]; + h_uChem[nlocal] = buf[m++]; + h_uCG[nlocal] = buf[m++]; + h_uCGnew[nlocal] = buf[m++]; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::size_restart() +{ + int i; + + int nlocal = atom->nlocal; + int n = 15 * nlocal; // 11 + dpdTheta + uCond + uMech + uChem + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + molecular types may be negative, but write as positive +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_restart(int i, double *buf) +{ + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK ); + + int m = 1; + buf[m++] = h_x(i,0); + buf[m++] = h_x(i,1); + buf[m++] = h_x(i,2); + buf[m++] = ubuf(h_tag(i)).d; + buf[m++] = ubuf(h_type(i)).d; + buf[m++] = ubuf(h_mask(i)).d; + buf[m++] = ubuf(h_image(i)).d; + buf[m++] = h_v(i,0); + buf[m++] = h_v(i,1); + buf[m++] = h_v(i,2); + buf[m++] = h_dpdTheta[i]; + buf[m++] = h_uCond[i]; + buf[m++] = h_uMech[i]; + buf[m++] = h_uChem[i]; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK ); + + int m = 1; + h_x(nlocal,0) = buf[m++]; + h_x(nlocal,1) = buf[m++]; + h_x(nlocal,2) = buf[m++]; + h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; + h_type(nlocal) = (int) ubuf(buf[m++]).i; + h_mask(nlocal) = (int) ubuf(buf[m++]).i; + h_image(nlocal) = (imageint) ubuf(buf[m++]).i; + h_v(nlocal,0) = buf[m++]; + h_v(nlocal,1) = buf[m++]; + h_v(nlocal,2) = buf[m++]; + h_dpdTheta[nlocal] = buf[m++]; + h_uCond[nlocal] = buf[m++]; + h_uMech[nlocal] = buf[m++]; + h_uChem[nlocal] = buf[m++]; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast (ubuf(buf[m++]).i) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + set other values to defaults +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]); + atomKK->modified(Host,ALL_MASK); + grow(0); + //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]); + } + atomKK->modified(Host,ALL_MASK); + + tag[nlocal] = 0; + type[nlocal] = itype; + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + h_mask[nlocal] = 1; + h_image[nlocal] = ((tagint) IMGMAX << IMG2BITS) | + ((tagint) IMGMAX << IMGBITS) | IMGMAX; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + h_rho[nlocal] = 0.0; + h_dpdTheta[nlocal] = 0.0; + h_uCond[nlocal] = 0.0; + h_uMech[nlocal] = 0.0; + h_uChem[nlocal] = 0.0; + h_uCG[nlocal] = 0.0; + h_uCGnew[nlocal] = 0.0; + h_duChem[nlocal] = 0.0; + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + initialize other atom quantities +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::data_atom(double *coord, tagint imagetmp, + char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag[nlocal] = ATOTAGINT(values[0]); + h_type[nlocal] = atoi(values[1]); + if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom type in Atoms section of data file"); + + h_dpdTheta[nlocal] = atof(values[2]); + if (h_dpdTheta[nlocal] <= 0) + error->one(FLERR,"Internal temperature in Atoms section of date file must be > zero"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image[nlocal] = imagetmp; + + h_mask[nlocal] = 1; + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + + h_rho[nlocal] = 0.0; + h_uCond[nlocal] = 0.0; + h_uMech[nlocal] = 0.0; + h_uChem[nlocal] = 0.0; + h_uCG[nlocal] = 0.0; + h_uCGnew[nlocal] = 0.0; + + atomKK->modified(Host,ALL_MASK); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack hybrid quantities from one line in Atoms section of data file + initialize other atom quantities for this sub-style +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::data_atom_hybrid(int nlocal, char **values) +{ + h_dpdTheta(nlocal) = atof(values[0]); + + return 1; +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::pack_data(double **buf) +{ + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag(i)).d; + buf[i][1] = ubuf(h_type(i)).d; + buf[i][2] = h_dpdTheta(i); + buf[i][3] = h_x(i,0); + buf[i][4] = h_x(i,1); + buf[i][5] = h_x(i,2); + buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; + buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; + buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; + } +} + +/* ---------------------------------------------------------------------- + pack hybrid atom info for data file +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::pack_data_hybrid(int i, double *buf) +{ + buf[0] = h_dpdTheta(i); + return 1; +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::write_data(FILE *fp, int n, double **buf) +{ + for (int i = 0; i < n; i++) + fprintf(fp,TAGINT_FORMAT " %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", + (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, + buf[i][2],buf[i][3],buf[i][4],buf[i][5], + (int) ubuf(buf[i][6]).i,(int) ubuf(buf[i][7]).i, + (int) ubuf(buf[i][8]).i); +} + +/* ---------------------------------------------------------------------- + write hybrid atom info to data file +------------------------------------------------------------------------- */ + +int AtomVecDPDKokkos::write_data_hybrid(FILE *fp, double *buf) +{ + fprintf(fp," %-1.16e",buf[0]); + return 1; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecDPDKokkos::memory_usage() +{ + bigint bytes = 0; + + if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); + if (atom->memcheck("type")) bytes += memory->usage(type,nmax); + if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); + if (atom->memcheck("image")) bytes += memory->usage(image,nmax); + if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); + if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); + if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); + if (atom->memcheck("rho")) bytes += memory->usage(rho,nmax); + if (atom->memcheck("dpdTheta")) bytes += memory->usage(dpdTheta,nmax); + if (atom->memcheck("uCond")) bytes += memory->usage(uCond,nmax); + if (atom->memcheck("uMech")) bytes += memory->usage(uMech,nmax); + if (atom->memcheck("uChem")) bytes += memory->usage(uChem,nmax); + if (atom->memcheck("uCG")) bytes += memory->usage(uCG,nmax); + if (atom->memcheck("uCGnew")) bytes += memory->usage(uCGnew,nmax); + if (atom->memcheck("duChem")) bytes += memory->usage(duChem,nmax); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + } else { + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + } else { + if ((mask & X_MASK) && atomKK->k_x.need_sync()) + perform_async_copy(atomKK->k_x,space); + if ((mask & V_MASK) && atomKK->k_v.need_sync()) + perform_async_copy(atomKK->k_v,space); + if ((mask & F_MASK) && atomKK->k_f.need_sync()) + perform_async_copy(atomKK->k_f,space); + if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) + perform_async_copy(atomKK->k_tag,space); + if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) + perform_async_copy(atomKK->k_type,space); + if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) + perform_async_copy(atomKK->k_mask,space); + if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) + perform_async_copy(atomKK->k_image,space); + } +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + } else { + if (mask & X_MASK) atomKK->k_x.modify(); + if (mask & V_MASK) atomKK->k_v.modify(); + if (mask & F_MASK) atomKK->k_f.modify(); + if (mask & TAG_MASK) atomKK->k_tag.modify(); + if (mask & TYPE_MASK) atomKK->k_type.modify(); + if (mask & MASK_MASK) atomKK->k_mask.modify(); + if (mask & IMAGE_MASK) atomKK->k_image.modify(); + } +} + diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h new file mode 100644 index 0000000000..d108e58ae7 --- /dev/null +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -0,0 +1,135 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(dpd/kk,AtomVecDPDKokkos) + +#else + +#ifndef LMP_ATOM_VEC_DPD_KOKKOS_H +#define LMP_ATOM_VEC_DPD_KOKKOS_H + +#include "atom_vec_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecDPDKokkos : public AtomVecKokkos { + public: + AtomVecDPDKokkos(class LAMMPS *); + virtual ~AtomVecDPDKokkos() {} + void grow(int); + void copy(int, int, int); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + int pack_comm_hybrid(int, int *, double *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int unpack_comm_hybrid(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + int pack_border_hybrid(int, int *, double *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int unpack_border_hybrid(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, tagint, char **); + int data_atom_hybrid(int, char **); + void pack_data(double **); + int pack_data_hybrid(int, double *); + void write_data(FILE *, int, double **); + int write_data_hybrid(FILE *, double *); + bigint memory_usage(); + + void grow_reset(); + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + double *uCond,*uMech,*uChem,*uCG,*uCGnew,*rho,*dpdTheta; + double *duChem; + + protected: + DAT::t_efloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; + HAT::t_efloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; + + tagint *tag; + imageint *image; + int *type,*mask; + double **x,**v,**f; + + DAT::t_tagint_1d d_tag; + HAT::t_tagint_1d h_tag; + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + DAT::t_int_1d d_type, d_mask; + HAT::t_int_1d h_type, h_mask; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::tdual_int_1d k_count; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +*/ From 91e38720d5d69052cc92cd2344126b81d97c4aca Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 28 Nov 2016 14:25:02 -0700 Subject: [PATCH 004/267] Adding pair_exp6_rx_kokkos files --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/atom_kokkos.cpp | 57 ++ src/KOKKOS/atom_kokkos.h | 4 + src/KOKKOS/pair_exp6_rx_kokkos.cpp | 1060 ++++++++++++++++++++++++++++ src/KOKKOS/pair_exp6_rx_kokkos.h | 204 ++++++ src/USER-DPD/pair_exp6_rx.cpp | 2 + src/atom.h | 4 +- 7 files changed, 1331 insertions(+), 2 deletions(-) create mode 100644 src/KOKKOS/pair_exp6_rx_kokkos.cpp create mode 100644 src/KOKKOS/pair_exp6_rx_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 93adf58ef5..14a8a951ee 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -134,6 +134,8 @@ action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp action pair_eam_alloy_kokkos.h pair_eam_alloy.h action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp action pair_eam_fs_kokkos.h pair_eam_fs.h +action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp +action pair_exp6_rx_kokkos.h pair_exp6_rx.h action pair_kokkos.h action pair_lj_charmm_coul_charmm_implicit_kokkos.cpp pair_lj_charmm_coul_charmm_implicit.cpp action pair_lj_charmm_coul_charmm_implicit_kokkos.h pair_lj_charmm_coul_charmm_implicit.h diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 577eff2364..4a7250e6ab 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -227,6 +227,63 @@ void AtomKokkos::grow(unsigned int mask){ } } +/* ---------------------------------------------------------------------- + add a custom variable with name of type flag = 0/1 for int/double + assumes name does not already exist + return index in ivector or dvector of its location +------------------------------------------------------------------------- */ + +int AtomKokkos::add_custom(const char *name, int flag) +{ + int index; + + if (flag == 0) { + index = nivector; + nivector++; + iname = (char **) memory->srealloc(iname,nivector*sizeof(char *), + "atom:iname"); + int n = strlen(name) + 1; + iname[index] = new char[n]; + strcpy(iname[index],name); + ivector = (int **) memory->srealloc(ivector,nivector*sizeof(int *), + "atom:ivector"); + memory->create(ivector[index],nmax,"atom:ivector"); + } else { + index = ndvector; + ndvector++; + dname = (char **) memory->srealloc(dname,ndvector*sizeof(char *), + "atom:dname"); + int n = strlen(name) + 1; + dname[index] = new char[n]; + strcpy(dname[index],name); + memory->grow_kokkos(k_dvector,dvector,ndvector,nmax, + "atom:dvector"); + } + + return index; +} + +/* ---------------------------------------------------------------------- + remove a custom variable of type flag = 0/1 for int/double at index + free memory for vector and name and set ptrs to NULL + ivector/dvector and iname/dname lists never shrink +------------------------------------------------------------------------- */ + +void AtomKokkos::remove_custom(int flag, int index) +{ + if (flag == 0) { + memory->destroy(ivector[index]); + ivector[index] = NULL; + delete [] iname[index]; + iname[index] = NULL; + } else { + //memory->destroy_kokkos(dvector); + dvector[index] = NULL; + delete [] dname[index]; + dname[index] = NULL; + } +} + /* ---------------------------------------------------------------------- */ void AtomKokkos::deallocate_topology() diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index f31c26e01f..cf454bcd0c 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -51,6 +51,8 @@ class AtomKokkos : public Atom { DAT::tdual_int_2d k_improper_type; DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4; + DAT::tdual_float_2d k_dvector; + // USER-DPD package DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew, @@ -66,6 +68,8 @@ class AtomKokkos : public Atom { void sync_overlapping_device(const ExecutionSpace space, unsigned int mask); virtual void sort(); virtual void grow(unsigned int mask); + int add_custom(const char *, int); + void remove_custom(int, int); virtual void deallocate_topology(); void sync_modify(ExecutionSpace, unsigned int, unsigned int); private: diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp new file mode 100644 index 0000000000..aa37c8375d --- /dev/null +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -0,0 +1,1060 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_exp6_rx_kokkos.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neigh_list.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "error.h" +#include "modify.h" +#include "fix.h" +#include + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXLINE 1024 +#define DELTA 4 + +#define oneFluidApproxParameter (-1) +#define isOneFluidApprox(_site) ( (_site) == oneFluidApproxParameter ) + +#define exp6PotentialType (1) +#define isExp6PotentialType(_type) ( (_type) == exp6PotentialType ) + +/* ---------------------------------------------------------------------- */ + +template +PairExp6rxKokkos::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) +{ + +} + +/* ---------------------------------------------------------------------- */ + +template +PairExp6rxKokkos::~PairExp6rxKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + dvector = atomKK->k_dvector.view(); + nlocal = atom->nlocal; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + special_coul[0] = force->special_coul[0]; + special_coul[1] = force->special_coul[1]; + special_coul[2] = force->special_coul[2]; + special_coul[3] = force->special_coul[3]; + newton_pair = force->newton_pair; + + copymode = 1; + + // Initialize the Exp6 parameter data for both the local + // and ghost atoms. Make the parameter data persistent + // and exchange like any other atom property later. + + { + const int np_total = nlocal + atom->nghost; + + PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); + PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); + PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); + PairExp6ParamData.fraction1 = typename AT::t_float_1d("PairExp6ParamData.fraction1" ,np_total); + PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); + PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); + PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); + PairExp6ParamData.fraction2 = typename AT::t_float_1d("PairExp6ParamData.fraction2" ,np_total); + PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); + PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); + PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); + PairExp6ParamData.fractionOld1 = typename AT::t_float_1d("PairExp6ParamData.fractionOld1",np_total); + PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); + PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); + PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); + PairExp6ParamData.fractionOld2 = typename AT::t_float_1d("PairExp6ParamData.fractionOld2",np_total); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); + } + + int inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + // loop over neighbors of my atoms + + EV_FLOAT ev; + + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxgetParamsEXP6, const int &i) const { + getParamsEXP6 (i, PairExp6ParamData.epsilon1[i], + PairExp6ParamData.alpha1[i], + PairExp6ParamData.rm1[i], + PairExp6ParamData.fraction1[i], + PairExp6ParamData.epsilon2[i], + PairExp6ParamData.alpha2[i], + PairExp6ParamData.rm2[i], + PairExp6ParamData.fraction2[i], + PairExp6ParamData.epsilonOld1[i], + PairExp6ParamData.alphaOld1[i], + PairExp6ParamData.rmOld1[i], + PairExp6ParamData.fractionOld1[i], + PairExp6ParamData.epsilonOld2[i], + PairExp6ParamData.alphaOld2[i], + PairExp6ParamData.rmOld2[i], + PairExp6ParamData.fractionOld2[i]); +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii, EV_FLOAT& ev) const { + int i,j,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; + double rsq,r2inv,r6inv,forceExp6,factor_lj; + double rCut,rCutInv,rCut2inv,rCut6inv,rCutExp,urc,durc; + double rm2ij,rm6ij; + double r,rexp; + + double alphaOld12_ij, rmOld12_ij, epsilonOld12_ij; + double alphaOld21_ij, rmOld21_ij, epsilonOld21_ij; + double alpha12_ij, rm12_ij, epsilon12_ij; + double alpha21_ij, rm21_ij, epsilon21_ij; + double rminv, buck1, buck2; + double epsilonOld1_i,alphaOld1_i,rmOld1_i; + double epsilonOld1_j,alphaOld1_j,rmOld1_j; + double epsilonOld2_i,alphaOld2_i,rmOld2_i; + double epsilonOld2_j,alphaOld2_j,rmOld2_j; + double epsilon1_i,alpha1_i,rm1_i; + double epsilon1_j,alpha1_j,rm1_j; + double epsilon2_i,alpha2_i,rm2_i; + double epsilon2_j,alpha2_j,rm2_j; + double evdwlOldEXP6_12, evdwlOldEXP6_21, fpairOldEXP6_12, fpairOldEXP6_21; + double evdwlEXP6_12, evdwlEXP6_21; + double fractionOld1_i, fractionOld1_j; + double fractionOld2_i, fractionOld2_j; + double fraction1_i, fraction1_j; + double fraction2_i, fraction2_j; + + const int nRep = 12; + const double shift = 1.05; + double rin1, aRep, uin1, win1, uin1rep, rin1exp, rin6, rin6inv; + + evdwlOld = 0.0; + evdwl = 0.0; + + i = d_ilist[ii]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + itype = type[i]; + jnum = d_numneigh[i]; + + { + epsilon1_i = PairExp6ParamData.epsilon1[i]; + alpha1_i = PairExp6ParamData.alpha1[i]; + rm1_i = PairExp6ParamData.rm1[i]; + fraction1_i = PairExp6ParamData.fraction1[i]; + epsilon2_i = PairExp6ParamData.epsilon2[i]; + alpha2_i = PairExp6ParamData.alpha2[i]; + rm2_i = PairExp6ParamData.rm2[i]; + fraction2_i = PairExp6ParamData.fraction2[i]; + epsilonOld1_i = PairExp6ParamData.epsilonOld1[i]; + alphaOld1_i = PairExp6ParamData.alphaOld1[i]; + rmOld1_i = PairExp6ParamData.rmOld1[i]; + fractionOld1_i = PairExp6ParamData.fractionOld1[i]; + epsilonOld2_i = PairExp6ParamData.epsilonOld2[i]; + alphaOld2_i = PairExp6ParamData.alphaOld2[i]; + rmOld2_i = PairExp6ParamData.rmOld2[i]; + fractionOld2_i = PairExp6ParamData.fractionOld2[i]; + } + + for (jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x(j,0); + dely = ytmp - x(j,1); + delz = ztmp - x(j,2); + + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + + r = sqrt(rsq); + rCut2inv = 1.0/cutsq[itype][jtype]; + rCut6inv = rCut2inv*rCut2inv*rCut2inv; + rCut = sqrt(cutsq[itype][jtype]); + rCutInv = 1.0/rCut; + + // + // A. Compute the exp-6 potential + // + + // A1. Get alpha, epsilon and rm for particle j + + { + epsilon1_j = PairExp6ParamData.epsilon1[j]; + alpha1_j = PairExp6ParamData.alpha1[j]; + rm1_j = PairExp6ParamData.rm1[j]; + fraction1_j = PairExp6ParamData.fraction1[j]; + epsilon2_j = PairExp6ParamData.epsilon2[j]; + alpha2_j = PairExp6ParamData.alpha2[j]; + rm2_j = PairExp6ParamData.rm2[j]; + fraction2_j = PairExp6ParamData.fraction2[j]; + epsilonOld1_j = PairExp6ParamData.epsilonOld1[j]; + alphaOld1_j = PairExp6ParamData.alphaOld1[j]; + rmOld1_j = PairExp6ParamData.rmOld1[j]; + fractionOld1_j = PairExp6ParamData.fractionOld1[j]; + epsilonOld2_j = PairExp6ParamData.epsilonOld2[j]; + alphaOld2_j = PairExp6ParamData.alphaOld2[j]; + rmOld2_j = PairExp6ParamData.rmOld2[j]; + fractionOld2_j = PairExp6ParamData.fractionOld2[j]; + } + + // A2. Apply Lorentz-Berthelot mixing rules for the i-j pair + alphaOld12_ij = sqrt(alphaOld1_i*alphaOld2_j); + rmOld12_ij = 0.5*(rmOld1_i + rmOld2_j); + epsilonOld12_ij = sqrt(epsilonOld1_i*epsilonOld2_j); + alphaOld21_ij = sqrt(alphaOld2_i*alphaOld1_j); + rmOld21_ij = 0.5*(rmOld2_i + rmOld1_j); + epsilonOld21_ij = sqrt(epsilonOld2_i*epsilonOld1_j); + + alpha12_ij = sqrt(alpha1_i*alpha2_j); + rm12_ij = 0.5*(rm1_i + rm2_j); + epsilon12_ij = sqrt(epsilon1_i*epsilon2_j); + alpha21_ij = sqrt(alpha2_i*alpha1_j); + rm21_ij = 0.5*(rm2_i + rm1_j); + epsilon21_ij = sqrt(epsilon2_i*epsilon1_j); + + if(rmOld12_ij!=0.0 && rmOld21_ij!=0.0){ + if(alphaOld21_ij == 6.0 || alphaOld12_ij == 6.0) + error->all(FLERR,"alpha_ij is 6.0 in pair exp6"); + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rmOld12_ij; + buck1 = epsilonOld12_ij / (alphaOld12_ij - 6.0); + rexp = expValue(alphaOld12_ij*(1.0-r*rminv)); + rm2ij = rmOld12_ij*rmOld12_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alphaOld12_ij*(1.0-rCut*rminv)); + buck2 = 6.0*alphaOld12_ij; + urc = buck1*(6.0*rCutExp - alphaOld12_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rmOld12_ij*func_rin(alphaOld12_ij); + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alphaOld12_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alphaOld12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc; + + aRep = -1.0*win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + forceExp6 = -double(nRep)*aRep/powint(r,nRep); + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = buck1*(6.0*rexp - alphaOld12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rmOld21_ij; + buck1 = epsilonOld21_ij / (alphaOld21_ij - 6.0); + buck2 = 6.0*alphaOld21_ij; + rexp = expValue(alphaOld21_ij*(1.0-r*rminv)); + rm2ij = rmOld21_ij*rmOld21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alphaOld21_ij*(1.0-rCut*rminv)); + buck2 = 6.0*alphaOld21_ij; + urc = buck1*(6.0*rCutExp - alphaOld21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rmOld21_ij*func_rin(alphaOld21_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alphaOld21_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alphaOld21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc; + + aRep = -1.0*win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + forceExp6 = -double(nRep)*aRep/powint(r,nRep); + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = buck1*(6.0*rexp - alphaOld21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + if (isite1 == isite2) + evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12; + else + evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*evdwlOldEXP6_21; + + evdwlOld *= factor_lj; + + uCG[i] += 0.5*evdwlOld; + if (newton_pair || j < nlocal) + uCG[j] += 0.5*evdwlOld; + } + + if(rm12_ij!=0.0 && rm21_ij!=0.0){ + if(alpha21_ij == 6.0 || alpha12_ij == 6.0) + error->all(FLERR,"alpha_ij is 6.0 in pair exp6"); + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rm12_ij; + buck1 = epsilon12_ij / (alpha12_ij - 6.0); + buck2 = 6.0*alpha12_ij; + rexp = expValue(alpha12_ij*(1.0-r*rminv)); + rm2ij = rm12_ij*rm12_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alpha12_ij*(1.0-rCut*rminv)); + urc = buck1*(6.0*rCutExp - alpha12_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rm12_ij*func_rin(alpha12_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alpha12_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alpha12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc; + + aRep = -1.0*win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + evdwlEXP6_12 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + evdwlEXP6_12 = buck1*(6.0*rexp - alpha12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + rminv = 1.0/rm21_ij; + buck1 = epsilon21_ij / (alpha21_ij - 6.0); + buck2 = 6.0*alpha21_ij; + rexp = expValue(alpha21_ij*(1.0-r*rminv)); + rm2ij = rm21_ij*rm21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alpha21_ij*(1.0-rCut*rminv)); + urc = buck1*(6.0*rCutExp - alpha21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rm21_ij*func_rin(alpha21_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alpha21_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alpha21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = -buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) - rin1*durc; + + aRep = -1.0*win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + evdwlEXP6_21 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + evdwlEXP6_21 = buck1*(6.0*rexp - alpha21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + // + // Apply Mixing Rule to get the overall force for the CG pair + // + if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12; + else fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*fpairOldEXP6_21; + + f(i,0) += delx*fpair; + f(i,1) += dely*fpair; + f(i,2) += delz*fpair; + if (newton_pair || j < nlocal) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } + + if (isite1 == isite2) evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12; + else evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21; + evdwl *= factor_lj; + + uCGnew[i] += 0.5*evdwl; + if (newton_pair || j < nlocal) + uCGnew[j] += 0.5*evdwl; + evdwl = evdwlOld; + //if (vflag_either || eflag_atom) + if (EVFLAG) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairExp6rxCompute(), ii, ev); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairExp6rxKokkos::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + memory->create(cut,n+1,n+1,"pair:cut_lj"); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairExp6rxKokkos::read_file(char *file) +{ + int params_per_line = 5; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = maxparam = 0; + + // open file on proc 0 + + FILE *fp; + fp = NULL; + if (comm->me == 0) { + fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open exp6/rx potential file %s",file); + error->one(FLERR,str); + } + } + + // read each set of params from potential file + // one set of params can span multiple lines + + int n,nwords,ispecies; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in exp6/rx potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + for (ispecies = 0; ispecies < nspecies; ispecies++) + if (strcmp(words[0],&atom->dname[ispecies][0]) == 0) break; + if (ispecies == nspecies) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + memory->grow_kokkos(k_params,params,maxparam, + "pair:params"); + } + + params[nparams].ispecies = ispecies; + + n = strlen(&atom->dname[ispecies][0]) + 1; + params[nparams].name = new char[n]; + strcpy(params[nparams].name,&atom->dname[ispecies][0]); + + n = strlen(words[1]) + 1; + params[nparams].potential = new char[n]; + strcpy(params[nparams].potential,words[1]); + if (strcmp(params[nparams].potential,"exp6") == 0){ + params[nparams].alpha = atof(words[2]); + params[nparams].epsilon = atof(words[3]); + params[nparams].rm = atof(words[4]); + if (params[nparams].epsilon <= 0.0 || params[nparams].rm <= 0.0 || + params[nparams].alpha < 0.0) + error->all(FLERR,"Illegal exp6/rx parameters. Rm and Epsilon must be greater than zero. Alpha cannot be negative."); + } else { + error->all(FLERR,"Illegal exp6/rx parameters. Interaction potential does not exist."); + } + nparams++; + } + + delete [] words; + + k_params.template modify(); + k_params.template sync(); + d_params = k_params.template view(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairExp6rxKokkos::setup() +{ + int i,j,n; + + // set mol2param for all combinations + // must be a single exact match to lines read from file + + memory->destroy_kokkos(k_mol2param,mol2param); + memory->create_kokkos(k_mol2param,mol2param,nspecies,"pair:mol2param"); + + for (i = 0; i < nspecies; i++) { + n = -1; + for (j = 0; j < nparams; j++) { + if (i == params[j].ispecies) { + if (n >= 0) error->all(FLERR,"Potential file has duplicate entry"); + n = j; + } + } + mol2param[i] = n; + } + + k_mol2param.template modify(); + k_mol2param.template sync(); + d_mol2param = k_mol2param.template view(); + + neighflag = lmp->kokkos->neighflag; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm1, double &fraction1,double &epsilon2,double &alpha2,double &rm2,double &fraction2,double &epsilon1_old,double &alpha1_old,double &rm1_old, double &fraction1_old,double &epsilon2_old,double &alpha2_old,double &rm2_old,double &fraction2_old) const +{ + int iparam, jparam; + double rmi, rmj, rmij, rm3ij; + double epsiloni, epsilonj, epsilonij; + double alphai, alphaj, alphaij; + double epsilon_old, rm3_old, alpha_old; + double epsilon, rm3, alpha; + double fractionOFA, fractionOFA_old; + double nTotalOFA, nTotalOFA_old; + double nTotal, nTotal_old; + double xMolei, xMolej, xMolei_old, xMolej_old; + + rm3 = 0.0; + epsilon = 0.0; + alpha = 0.0; + epsilon_old = 0.0; + rm3_old = 0.0; + alpha_old = 0.0; + fractionOFA = 0.0; + fractionOFA_old = 0.0; + nTotalOFA = 0.0; + nTotalOFA_old = 0.0; + nTotal = 0.0; + nTotal_old = 0.0; + + // Compute the total number of molecules in the old and new CG particle as well as the total number of molecules in the fluid portion of the old and new CG particle + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + nTotal += dvector(ispecies,id); + nTotal_old += dvector(ispecies+nspecies,id); + + iparam = mol2param[ispecies]; + + if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; + if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { + if (isite1 == d_params[iparam].ispecies || isite2 == d_params[iparam].ispecies) continue; + nTotalOFA_old += dvector(ispecies+nspecies,id); + nTotalOFA += dvector(ispecies,id); + } + } + if(nTotal < 1e-8 || nTotal_old < 1e-8) + error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + + // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) + fractionOFA_old = nTotalOFA_old / nTotal_old; + fractionOFA = nTotalOFA / nTotal; + + for (int ispecies = 0; ispecies < nspecies; ispecies++) { + iparam = mol2param[ispecies]; + if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; + + // If Site1 matches a pure species, then grab the parameters + if (isite1 == d_params[iparam].ispecies){ + rm1_old = d_params[iparam].rm; + rm1 = d_params[iparam].rm; + epsilon1_old = d_params[iparam].epsilon; + epsilon1 = d_params[iparam].epsilon; + alpha1_old = d_params[iparam].alpha; + alpha1 = d_params[iparam].alpha; + + // Compute the mole fraction of Site1 + fraction1_old = dvector(ispecies+nspecies,id)/nTotal_old; + fraction1 = dvector(ispecies,id)/nTotal; + } + + // If Site2 matches a pure species, then grab the parameters + if (isite2 == d_params[iparam].ispecies){ + rm2_old = d_params[iparam].rm; + rm2 = d_params[iparam].rm; + epsilon2_old = d_params[iparam].epsilon; + epsilon2 = d_params[iparam].epsilon; + alpha2_old = d_params[iparam].alpha; + alpha2 = d_params[iparam].alpha; + + // Compute the mole fraction of Site2 + fraction2_old = dvector(ispecies+nspecies,id)/nTotal_old; + fraction2 = dvector(ispecies,id)/nTotal; + } + + // If Site1 or Site2 matches is a fluid, then compute the paramters + if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { + if (isite1 == d_params[iparam].ispecies || isite2 == d_params[iparam].ispecies) continue; + rmi = d_params[iparam].rm; + epsiloni = d_params[iparam].epsilon; + alphai = d_params[iparam].alpha; + xMolei = dvector(ispecies,id)/nTotalOFA; + xMolei_old = dvector(ispecies+nspecies,id)/nTotalOFA_old; + + for (int jspecies = 0; jspecies < nspecies; jspecies++) { + jparam = mol2param[jspecies]; + if (jparam < 0 || d_params[jparam].potentialType != exp6PotentialType ) continue; + if (isite1 == d_params[jparam].ispecies || isite2 == d_params[jparam].ispecies) continue; + rmj = d_params[jparam].rm; + epsilonj = d_params[jparam].epsilon; + alphaj = d_params[jparam].alpha; + xMolej = dvector(jspecies,id)/nTotalOFA; + xMolej_old = dvector(jspecies+nspecies,id)/nTotalOFA_old; + + rmij = (rmi+rmj)/2.0; + rm3ij = rmij*rmij*rmij; + epsilonij = sqrt(epsiloni*epsilonj); + alphaij = sqrt(alphai*alphaj); + + if(fractionOFA_old > 0.0){ + rm3_old += xMolei_old*xMolej_old*rm3ij; + epsilon_old += xMolei_old*xMolej_old*rm3ij*epsilonij; + alpha_old += xMolei_old*xMolej_old*rm3ij*epsilonij*alphaij; + } + if(fractionOFA > 0.0){ + rm3 += xMolei*xMolej*rm3ij; + epsilon += xMolei*xMolej*rm3ij*epsilonij; + alpha += xMolei*xMolej*rm3ij*epsilonij*alphaij; + } + } + } + } + + if (isOneFluidApprox(isite1)){ + rm1 = cbrt(rm3); + if(rm1 < 1e-16) { + rm1 = 0.0; + epsilon1 = 0.0; + alpha1 = 0.0; + } else { + epsilon1 = epsilon / rm3; + alpha1 = alpha / epsilon1 / rm3; + } + + fraction1 = fractionOFA; + + rm1_old = cbrt(rm3_old); + if(rm1_old < 1e-16) { + rm1_old = 0.0; + epsilon1_old = 0.0; + alpha1_old = 0.0; + } else { + epsilon1_old = epsilon_old / rm3_old; + alpha1_old = alpha_old / epsilon1_old / rm3_old; + } + fraction1_old = fractionOFA_old; + + // Fuchslin-Like Exp-6 Scaling + double powfuch = 0.0; + if(fuchslinEpsilon < 0.0){ + powfuch = pow(nTotalOFA,-fuchslinEpsilon); + if(powfuch<1e-15) epsilon1 = 0.0; + else epsilon1 *= 1.0/powfuch; + + powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); + if(powfuch<1e-15) epsilon1_old = 0.0; + else epsilon1_old *= 1.0/powfuch; + + } else { + epsilon1 *= pow(nTotalOFA,fuchslinEpsilon); + epsilon1_old *= pow(nTotalOFA_old,fuchslinEpsilon); + } + + if(fuchslinR < 0.0){ + powfuch = pow(nTotalOFA,-fuchslinR); + if(powfuch<1e-15) rm1 = 0.0; + else rm1 *= 1.0/powfuch; + + powfuch = pow(nTotalOFA_old,-fuchslinR); + if(powfuch<1e-15) rm1_old = 0.0; + else rm1_old *= 1.0/powfuch; + + } else { + rm1 *= pow(nTotalOFA,fuchslinR); + rm1_old *= pow(nTotalOFA_old,fuchslinR); + } + } + + if (isOneFluidApprox(isite2)){ + rm2 = cbrt(rm3); + if(rm2 < 1e-16) { + rm2 = 0.0; + epsilon2 = 0.0; + alpha2 = 0.0; + } else { + epsilon2 = epsilon / rm3; + alpha2 = alpha / epsilon2 / rm3; + } + fraction2 = fractionOFA; + + rm2_old = cbrt(rm3_old); + if(rm2_old < 1e-16) { + rm2_old = 0.0; + epsilon2_old = 0.0; + alpha2_old = 0.0; + } else { + epsilon2_old = epsilon_old / rm3_old; + alpha2_old = alpha_old / epsilon2_old / rm3_old; + } + fraction2_old = fractionOFA_old; + + // Fuchslin-Like Exp-6 Scaling + double powfuch = 0.0; + if(fuchslinEpsilon < 0.0){ + powfuch = pow(nTotalOFA,-fuchslinEpsilon); + if(powfuch<1e-15) epsilon2 = 0.0; + else epsilon2 *= 1.0/powfuch; + + powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); + if(powfuch<1e-15) epsilon2_old = 0.0; + else epsilon2_old *= 1.0/powfuch; + + } else { + epsilon2 *= pow(nTotalOFA,fuchslinEpsilon); + epsilon2_old *= pow(nTotalOFA_old,fuchslinEpsilon); + } + + if(fuchslinR < 0.0){ + powfuch = pow(nTotalOFA,-fuchslinR); + if(powfuch<1e-15) rm2 = 0.0; + else rm2 *= 1.0/powfuch; + + powfuch = pow(nTotalOFA_old,-fuchslinR); + if(powfuch<1e-15) rm2_old = 0.0; + else rm2_old *= 1.0/powfuch; + + } else { + rm2 *= pow(nTotalOFA,fuchslinR); + rm2_old *= pow(nTotalOFA_old,fuchslinR); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairExp6rxKokkos::func_rin(const double &alpha) const +{ + double function; + + const double a = 3.7682065; + const double b = -1.4308614; + + function = a+b*sqrt(alpha); + function = expValue(function); + + return function; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairExp6rxKokkos::expValue(double value) const +{ + double returnValue; + if(value < DBL_MIN_EXP) returnValue = 0.0; + else returnValue = exp(value); + + return returnValue; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (EFLAG) { + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairExp6rxKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} + +namespace LAMMPS_NS { +template class PairExp6rxKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairExp6rxKokkos; +#endif +} \ No newline at end of file diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h new file mode 100644 index 0000000000..4ff055123c --- /dev/null +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -0,0 +1,204 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(exp6/rx/kk,PairExp6rxKokkos) +PairStyle(exp6/rx/kk/device,PairExp6rxKokkos) +PairStyle(exp6/rx/kk/host,PairExp6rxKokkos) + +#else + +#ifndef LMP_PAIR_EXP6_RX_KOKKOS_H +#define LMP_PAIR_EXP6_RX_KOKKOS_H + +#include "pair_exp6_rx.h" +#include "kokkos_type.h" +#include "pair_kokkos.h" + +namespace LAMMPS_NS { + +// Create a structure to hold the parameter data for all +// local and neighbor particles. Pack inside this struct +// to avoid any name clashes. + +template +struct PairExp6ParamDataTypeKokkos +{ + typedef ArrayTypes AT; + + int n; + typename AT::t_float_1d epsilon1, alpha1, rm1, fraction1, + epsilon2, alpha2, rm2, fraction2, + epsilonOld1, alphaOld1, rmOld1, fractionOld1, + epsilonOld2, alphaOld2, rmOld2, fractionOld2; + + // Default constructor -- nullify everything. + PairExp6ParamDataTypeKokkos(void) + : n(0) + {} +}; + +struct TagPairExp6rxgetParamsEXP6{}; + +template +struct TagPairExp6rxCompute{}; + +template +class PairExp6rxKokkos : public PairExp6rx { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairExp6rxKokkos(class LAMMPS *); + virtual ~PairExp6rxKokkos(); + virtual void compute(int, int); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxCompute, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxCompute, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxgetParamsEXP6, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + protected: + int eflag,vflag; + int nlocal,newton_pair,neighflag; + double special_coul[4]; + double special_lj[4]; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_efloat_1d uCG, uCGnew; + typename AT::t_float_2d dvector; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + PairExp6ParamDataTypeKokkos PairExp6ParamData; + + void allocate(); + DAT::tdual_int_1d k_mol2param; // mapping from molecule to parameters + typename AT::t_int_1d_randomread d_mol2param; + + typedef Kokkos::DualView tdual_param_1d; + typedef typename tdual_param_1d::t_dev_const_randomread t_param_1d_randomread; + + tdual_param_1d k_params; // parameter set for an I-J-K interaction + t_param_1d_randomread d_params; // parameter set for an I-J-K interaction + + typename ArrayTypes::tdual_ffloat_2d k_cutsq; + typename ArrayTypes::t_ffloat_2d d_cutsq; + + void read_file(char *); + void setup(); + + KOKKOS_INLINE_FUNCTION + void getParamsEXP6(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + double func_rin(const double &) const; + + KOKKOS_INLINE_FUNCTION + double expValue(const double) const; + + friend void pair_virial_fdotr_compute(PairExp6rxKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: alpha_ij is 6.0 in pair exp6 + +Self-explanatory + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: PairExp6rxKokkos requires a fix rx command + +The fix rx command must come before the pair style command in the input file + +E: There are no rx species specified + +There must be at least one species specified through the fix rx command + +E: Site1 name not recognized in pair coefficients + +The site1 keyword does not match the species keywords specified throug the fix rx command + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open exp6/rx potential file %s + +Self-explanatory + +E: Incorrect format in exp6/rx potential file + +Self-explanatory + +E: Illegal exp6/rx parameters. Rm and Epsilon must be greater than zero. Alpha cannot be negative. + +Self-explanatory + +E: Illegal exp6/rx parameters. Interaction potential does not exist. + +Self-explanatory + +E: Potential file has duplicate entry. + +Self-explanatory + +E: The number of molecules in CG particle is less than 1e-8. + +Self-explanatory. Check the species concentrations have been properly set +and check the reaction kinetic solver parameters in fix rx to more for +sufficient accuracy. + + +*/ diff --git a/src/USER-DPD/pair_exp6_rx.cpp b/src/USER-DPD/pair_exp6_rx.cpp index 9af28026ae..2643c9ec04 100644 --- a/src/USER-DPD/pair_exp6_rx.cpp +++ b/src/USER-DPD/pair_exp6_rx.cpp @@ -77,6 +77,8 @@ PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp) PairExp6rx::~PairExp6rx() { + if (copymode) return; + for (int i=0; i < nparams; ++i) { delete[] params[i].name; delete[] params[i].potential; diff --git a/src/atom.h b/src/atom.h index 9abbb49569..de7cda06ac 100644 --- a/src/atom.h +++ b/src/atom.h @@ -255,8 +255,8 @@ class Atom : protected Pointers { void update_callback(int); int find_custom(const char *, int &); - int add_custom(const char *, int); - void remove_custom(int, int); + virtual int add_custom(const char *, int); + virtual void remove_custom(int, int); virtual void sync_modify(ExecutionSpace, unsigned int, unsigned int) {} From 6d94439cfe6f9f4cfd00a04b7e45d89693ffac46 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 28 Nov 2016 14:42:47 -0700 Subject: [PATCH 005/267] Integrating pair_dpd_fdt_energy_kokkos files --- src/KOKKOS/Install.sh | 2 ++ src/{USER-DPD => KOKKOS}/pair_dpd_fdt_energy_kokkos.cpp | 0 src/{USER-DPD => KOKKOS}/pair_dpd_fdt_energy_kokkos.h | 0 src/KOKKOS/pair_exp6_rx_kokkos.cpp | 4 ++++ 4 files changed, 6 insertions(+) rename src/{USER-DPD => KOKKOS}/pair_dpd_fdt_energy_kokkos.cpp (100%) rename src/{USER-DPD => KOKKOS}/pair_dpd_fdt_energy_kokkos.h (100%) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 14a8a951ee..7e46b52c2b 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -128,6 +128,8 @@ action pair_coul_long_kokkos.cpp pair_coul_long.cpp action pair_coul_long_kokkos.h pair_coul_long.h action pair_coul_wolf_kokkos.cpp action pair_coul_wolf_kokkos.h +action pair_dpd_fdt_energy_kokkos.cpp pair_dpd_fdt_energy.cpp +action pair_dpd_fdt_energy_kokkos.h pair_dpd_fdt_energy.h action pair_eam_kokkos.cpp pair_eam.cpp action pair_eam_kokkos.h pair_eam.h action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp diff --git a/src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp similarity index 100% rename from src/USER-DPD/pair_dpd_fdt_energy_kokkos.cpp rename to src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp diff --git a/src/USER-DPD/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h similarity index 100% rename from src/USER-DPD/pair_dpd_fdt_energy_kokkos.h rename to src/KOKKOS/pair_dpd_fdt_energy_kokkos.h diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index aa37c8375d..754fa4667d 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -11,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (Sandia) +------------------------------------------------------------------------- */ + #include #include #include From 6e6776f39635b1b69dab532bced2b0d95f150d62 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 30 Nov 2016 16:25:07 -0500 Subject: [PATCH 006/267] Finish moving/integrating atom_vec_dpd_kokkos into the Kokkos package --- src/KOKKOS/Install.sh | 2 + src/USER-DPD/atom_vec_dpd_kokkos.cpp | 1874 -------------------------- src/USER-DPD/atom_vec_dpd_kokkos.h | 135 -- 3 files changed, 2 insertions(+), 2009 deletions(-) delete mode 100644 src/USER-DPD/atom_vec_dpd_kokkos.cpp delete mode 100644 src/USER-DPD/atom_vec_dpd_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 7e46b52c2b..1381a1978c 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -47,6 +47,8 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp action atom_vec_bond_kokkos.h atom_vec_bond.h action atom_vec_charge_kokkos.cpp action atom_vec_charge_kokkos.h +action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp +action atom_vec_dpd_kokkos.h atom_vec_dpd.h action atom_vec_full_kokkos.cpp atom_vec_full.cpp action atom_vec_full_kokkos.h atom_vec_full.h action atom_vec_kokkos.cpp diff --git a/src/USER-DPD/atom_vec_dpd_kokkos.cpp b/src/USER-DPD/atom_vec_dpd_kokkos.cpp deleted file mode 100644 index c79559172f..0000000000 --- a/src/USER-DPD/atom_vec_dpd_kokkos.cpp +++ /dev/null @@ -1,1874 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include -#include "atom_vec_dpd_kokkos.h" -#include "atom_kokkos.h" -#include "comm_kokkos.h" -#include "domain.h" -#include "modify.h" -#include "fix.h" -#include "atom_masks.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; - -#define DELTA 10000 - -/* ---------------------------------------------------------------------- */ - -AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) -{ - molecular = 0; - mass_type = 1; - - comm_x_only = comm_f_only = 0; - size_forward = 7; - size_reverse = 3; - size_border = 12; - size_velocity = 3; - size_data_atom = 6; - size_data_vel = 4; - xcol_data = 4; - - atom->rho_flag = 1; - atom->dpd_flag = 1; - - k_count = DAT::tdual_int_1d("atom::k_count",1); - atomKK = (AtomKokkos *) atom; - commKK = (CommKokkos *) comm; -} - -/* ---------------------------------------------------------------------- - grow atom arrays - n = 0 grows arrays by DELTA - n > 0 allocates arrays to size n -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::grow(int n) -{ - if (n == 0) nmax += DELTA; - else nmax = n; - atomKK->nmax = nmax; - if (nmax < 0 || nmax > MAXSMALLINT) - error->one(FLERR,"Per-processor system is too big"); - - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); - - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - - - memory->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); - memory->grow_kokkos(atomKK->k_dpdTheta,atomKK->dpdTheta,nmax,"atom:dpdTheta"); - memory->grow_kokkos(atomKK->k_uCond,atomKK->uCond,nmax,"atom:uCond"); - memory->grow_kokkos(atomKK->k_uMech,atomKK->uMech,nmax,"atom:uMech"); - memory->grow_kokkos(atomKK->k_uChem,atomKK->uChem,nmax,"atom:uChem"); - memory->grow_kokkos(atomKK->k_uCG,atomKK->uCG,nmax,"atom:uCG"); - memory->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); - memory->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); - - grow_reset(); - sync(Host,ALL_MASK); - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); -} - -/* ---------------------------------------------------------------------- - reset local array ptrs -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::grow_reset() -{ - tag = atomKK->tag; - d_tag = atomKK->k_tag.d_view; - h_tag = atomKK->k_tag.h_view; - - type = atomKK->type; - d_type = atomKK->k_type.d_view; - h_type = atomKK->k_type.h_view; - mask = atomKK->mask; - d_mask = atomKK->k_mask.d_view; - h_mask = atomKK->k_mask.h_view; - image = atomKK->image; - d_image = atomKK->k_image.d_view; - h_image = atomKK->k_image.h_view; - - x = atomKK->x; - d_x = atomKK->k_x.d_view; - h_x = atomKK->k_x.h_view; - v = atomKK->v; - d_v = atomKK->k_v.d_view; - h_v = atomKK->k_v.h_view; - f = atomKK->f; - d_f = atomKK->k_f.d_view; - h_f = atomKK->k_f.h_view; - - rho = atomKK->rho; - d_rho = atomKK->k_rho.d_view; - h_rho = atomKK->k_rho.h_view; - dpdTheta = atomKK->dpdTheta; - d_dpdTheta = atomKK->k_dpdTheta.d_view; - h_dpdTheta = atomKK->k_dpdTheta.h_view; - uCond = atomKK->uCond; - d_uCond = atomKK->k_uCond.d_view;; - h_uCond = atomKK->k_uCond.h_view; - uMech = atomKK->uMech; - d_uMech = atomKK->k_uMech.d_view;; - h_uMech = atomKK->k_uMech.h_view; - uChem = atomKK->uChem; - d_uChem = atomKK->k_uChem.d_view;; - h_uChem = atomKK->k_uChem.h_view; - uCG = atomKK->uCG; - d_uCG = atomKK->k_uCG.d_view;; - h_uCG = atomKK->k_uCG.h_view; - uCGnew = atomKK->uCGnew; - d_uCGnew = atomKK->k_uCGnew.d_view;; - h_uCGnew = atomKK->k_uCGnew.h_view; - duChem = atomKK->duChem; - d_duChem = atomKK->k_duChem.d_view;; - h_duChem = atomKK->k_duChem.h_view; -} - -/* ---------------------------------------------------------------------- - copy atom I info to atom J -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::copy(int i, int j, int delflag) -{ - h_tag[j] = h_tag[i]; - h_type[j] = h_type[i]; - mask[j] = mask[i]; - h_image[j] = h_image[i]; - h_x(j,0) = h_x(i,0); - h_x(j,1) = h_x(i,1); - h_x(j,2) = h_x(i,2); - h_v(j,0) = h_v(i,0); - h_v(j,1) = h_v(i,1); - h_v(j,2) = h_v(i,2); - h_dpdTheta[j] = h_dpdTheta[i]; - h_uCond[j] = h_uCond[i]; - h_uMech[j] = h_uMech[i]; - h_uChem[j] = h_uChem[i]; - h_uCG[j] = h_uCG[i]; - h_uCGnew[j] = h_uCGnew[i]; - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename ArrayTypes::t_xfloat_2d_um _buf; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecDPDKokkos_PackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_efloat_1d &dpdTheta, - const typename DAT::tdual_efloat_1d &uCond, - const typename DAT::tdual_efloat_1d &uMech, - const typename DAT::tdual_efloat_1d &uChem, - const typename DAT::tdual_xfloat_2d &buf, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - const size_t maxsend = (buf.view().dimension_0()*buf.view().dimension_1())/3; - const size_t elements = 3; - buffer_view(_buf,buf,maxsend,elements); - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _buf(i,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _buf(i,3) = _dpdTheta(j); - _buf(i,4) = _uCond(j); - _buf(i,5) = _uMech(j); - _buf(i,6) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, - const DAT::tdual_int_2d &list, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, - const int* const pbc) -{ - // Check whether to always run forward communication on the host - // Choose correct forward PackComm kernel - - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - LMPHostType::fence(); - } else { - sync(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - LMPDeviceType::fence(); - } - - return n*size_forward; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackCommSelf { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array_randomread _x; - typename ArrayTypes::t_x_array _xw; - typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; - int _nfirst; - typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - X_FLOAT _pbc[6]; - - AtomVecDPDKokkos_PackCommSelf( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_efloat_1d &dpdTheta, - const typename DAT::tdual_efloat_1d &uCond, - const typename DAT::tdual_efloat_1d &uMech, - const typename DAT::tdual_efloat_1d &uChem, - const int &nfirst, - const typename DAT::tdual_int_2d &list, - const int & iswap, - const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, - const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc): - _x(x.view()),_xw(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _nfirst(nfirst),_list(list.view()),_iswap(iswap), - _xprd(xprd),_yprd(yprd),_zprd(zprd), - _xy(xy),_xz(xz),_yz(yz) { - _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2]; - _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5]; - }; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); - } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd; - } - } - _dpdTheta(i+_nfirst) = _dpdTheta(j); - _uCond(i+_nfirst) = _uCond(j); - _uMech(i+_nfirst) = _uMech(j); - _uChem(i+_nfirst) = _uChem(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, - const int nfirst, const int &pbc_flag, const int* const pbc) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - LMPHostType::fence(); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - if(pbc_flag) { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } else { - if(domain->triclinic) { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } else { - struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - nfirst,list,iswap, - domain->xprd,domain->yprd,domain->zprd, - domain->xy,domain->xz,domain->yz,pbc); - Kokkos::parallel_for(n,f); - } - } - LMPDeviceType::fence(); - } - return n*3; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackComm { - typedef DeviceType device_type; - - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem; - typename ArrayTypes::t_xfloat_2d_const _buf; - int _first; - - AtomVecDPDKokkos_UnpackComm( - const typename DAT::tdual_x_array &x, - const typename DAT::tdual_efloat_1d &dpdTheta, - const typename DAT::tdual_efloat_1d &uCond, - const typename DAT::tdual_efloat_1d &uMech, - const typename DAT::tdual_efloat_1d &uChem, - const typename DAT::tdual_xfloat_2d &buf, - const int& first):_x(x.view()), - _dpdTheta(dpdTheta.view()), - _uCond(uCond.view()), - _uMech(uMech.view()), - _uChem(uChem.view()), - _buf(buf.view()), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _dpdTheta(i+_first) = _buf(i,3); - _uCond(i+_first) = _buf(i,4); - _uMech(i+_first) = _buf(i,5); - _uChem(i+_first) = _buf(i,6); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf ) { - if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); - } else { - sync(Device,X_MASK); - modified(Device,X_MASK); - struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, - atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, - buf,first); - Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = dpdTheta[j]; - buf[m++] = uCond[j]; - buf[m++] = uMech[j]; - buf[m++] = uChem[j]; - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_dpdTheta[j]; - buf[m++] = h_uCond[j]; - buf[m++] = h_uMech[j]; - buf[m++] = h_uChem[j]; - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - buf[m++] = h_dpdTheta[j]; - buf[m++] = h_uCond[j]; - buf[m++] = h_uMech[j]; - buf[m++] = h_uChem[j]; - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; - dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; - dz = pbc[2]*domain->zprd; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - buf[m++] = h_dpdTheta[j]; - buf[m++] = h_uCond[j]; - buf[m++] = h_uMech[j]; - buf[m++] = h_uChem[j]; - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - } - } - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_dpdTheta[i] = buf[m++]; - h_uCond[i] = buf[m++]; - h_uMech[i] = buf[m++]; - h_uChem[i] = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - h_dpdTheta[i] = buf[m++]; - h_uCond[i] = buf[m++]; - h_uMech[i] = buf[m++]; - h_uChem[i] = buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) -{ - if(n > 0) - sync(Host,F_MASK); - - int m = 0; - const int last = first + n; - for (int i = first; i < last; i++) { - buf[m++] = h_f(i,0); - buf[m++] = h_f(i,1); - buf[m++] = h_f(i,2); - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf) -{ - if(n > 0) { - sync(Host,F_MASK); - modified(Host,F_MASK); - } - - int m = 0; - for (int i = 0; i < n; i++) { - const int j = list[i]; - h_f(j,0) += buf[m++]; - h_f(j,1) += buf[m++]; - h_f(j,2) += buf[m++]; - } -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackBorder { - typedef DeviceType device_type; - - typename ArrayTypes::t_xfloat_2d _buf; - const typename ArrayTypes::t_int_2d_const _list; - const int _iswap; - const typename ArrayTypes::t_x_array_randomread _x; - const typename ArrayTypes::t_tagint_1d _tag; - const typename ArrayTypes::t_int_1d _type; - const typename ArrayTypes::t_int_1d _mask; - typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - X_FLOAT _dx,_dy,_dz; - - AtomVecDPDKokkos_PackBorder( - const typename ArrayTypes::t_xfloat_2d &buf, - const typename ArrayTypes::t_int_2d_const &list, - const int & iswap, - const typename ArrayTypes::t_x_array &x, - const typename ArrayTypes::t_tagint_1d &tag, - const typename ArrayTypes::t_int_1d &type, - const typename ArrayTypes::t_int_1d &mask, - const typename ArrayTypes::t_efloat_1d &dpdTheta, - const typename ArrayTypes::t_efloat_1d &uCond, - const typename ArrayTypes::t_efloat_1d &uMech, - const typename ArrayTypes::t_efloat_1d &uChem, - const typename ArrayTypes::t_efloat_1d &uCG, - const typename ArrayTypes::t_efloat_1d &uCGnew, - const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz): - _buf(buf),_list(list),_iswap(iswap), - _x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCGnew), - _uCGnew(uCGnew), - _dx(dx),_dy(dy),_dz(dz) {} - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - const int j = _list(_iswap,i); - if (PBC_FLAG == 0) { - _buf(i,0) = _x(j,0); - _buf(i,1) = _x(j,1); - _buf(i,2) = _x(j,2); - } else { - _buf(i,0) = _x(j,0) + _dx; - _buf(i,1) = _x(j,1) + _dy; - _buf(i,2) = _x(j,2) + _dz; - } - _buf(i,3) = _tag(j); - _buf(i,4) = _type(j); - _buf(i,5) = _mask(j); - _buf(i,6) = _dpdTheta(j); - _buf(i,7) = _uCond(j); - _buf(i,8) = _uMech(j); - _buf(i,9) = _uChem(j); - _buf(i,10) = _uCG(j); - _buf(i,11) = _uCGnew(j); - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, - int pbc_flag, int *pbc, ExecutionSpace space) -{ - X_FLOAT dx,dy,dz; - - if (pbc_flag != 0) { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if(space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - LMPHostType::fence(); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); - } - - } else { - dx = dy = dz = 0; - if(space==Host) { - AtomVecDPDKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - LMPHostType::fence(); - } else { - AtomVecDPDKokkos_PackBorder f( - buf.view(), k_sendlist.view(), - iswap,d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - dx,dy,dz); - Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); - } - } - return n*6; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - buf[m++] = h_uCG(j); - buf[m++] = h_uCGnew(j); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - buf[m++] = h_uCG(j); - buf[m++] = h_uCGnew(j); - } - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, - int pbc_flag, int *pbc) -{ - int i,j,m; - double dx,dy,dz,dvx,dvy,dvz; - - m = 0; - if (pbc_flag == 0) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0); - buf[m++] = h_x(j,1); - buf[m++] = h_x(j,2); - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - buf[m++] = h_uCG(j); - buf[m++] = h_uCGnew(j); - } - } else { - if (domain->triclinic == 0) { - dx = pbc[0]*domain->xprd; - dy = pbc[1]*domain->yprd; - dz = pbc[2]*domain->zprd; - } else { - dx = pbc[0]; - dy = pbc[1]; - dz = pbc[2]; - } - if (!deform_vremap) { - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - buf[m++] = h_uCG(j); - buf[m++] = h_uCGnew(j); - } - } else { - dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; - dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; - dvz = pbc[2]*h_rate[2]; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_x(j,0) + dx; - buf[m++] = h_x(j,1) + dy; - buf[m++] = h_x(j,2) + dz; - buf[m++] = ubuf(h_tag(j)).d; - buf[m++] = ubuf(h_type(j)).d; - buf[m++] = ubuf(h_mask(j)).d; - if (mask[i] & deform_groupbit) { - buf[m++] = h_v(j,0) + dvx; - buf[m++] = h_v(j,1) + dvy; - buf[m++] = h_v(j,2) + dvz; - } else { - buf[m++] = h_v(j,0); - buf[m++] = h_v(j,1); - buf[m++] = h_v(j,2); - } - buf[m++] = h_dpdTheta(j); - buf[m++] = h_uCond(j); - buf[m++] = h_uMech(j); - buf[m++] = h_uChem(j); - buf[m++] = h_uCG(j); - buf[m++] = h_uCGnew(j); - } - } - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); - - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) -{ - int i,j,m; - - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_dpdTheta[j]; - buf[m++] = h_uCond[j]; - buf[m++] = h_uMech[j]; - buf[m++] = h_uChem[j]; - buf[m++] = h_uCG[j]; - buf[m++] = h_uCGnew[j]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) -{ - int i,j,m; - - m = 0; - for (i = 0; i < n; i++) { - j = list[i]; - buf[m++] = h_dpdTheta[j]; - buf[m++] = h_uCond[j]; - buf[m++] = h_uMech[j]; - buf[m++] = h_uChem[j]; - buf[m++] = h_uCG[j]; - buf[m++] = h_uCGnew[j]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackBorder { - typedef DeviceType device_type; - - const typename ArrayTypes::t_xfloat_2d_const _buf; - typename ArrayTypes::t_x_array _x; - typename ArrayTypes::t_tagint_1d _tag; - typename ArrayTypes::t_int_1d _type; - typename ArrayTypes::t_int_1d _mask; - typename ArrayTypes::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - int _first; - - - AtomVecDPDKokkos_UnpackBorder( - const typename ArrayTypes::t_xfloat_2d_const &buf, - typename ArrayTypes::t_x_array &x, - typename ArrayTypes::t_tagint_1d &tag, - typename ArrayTypes::t_int_1d &type, - typename ArrayTypes::t_int_1d &mask, - const typename ArrayTypes::t_efloat_1d &dpdTheta, - const typename ArrayTypes::t_efloat_1d &uCond, - const typename ArrayTypes::t_efloat_1d &uMech, - const typename ArrayTypes::t_efloat_1d &uChem, - const typename ArrayTypes::t_efloat_1d &uCG, - const typename ArrayTypes::t_efloat_1d &uCGnew, - const int& first): - _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask), - _dpdTheta(dpdTheta), - _uCond(uCond), - _uMech(uMech), - _uChem(uChem), - _uCG(uCGnew), - _uCGnew(uCGnew), - _first(first) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - _x(i+_first,0) = _buf(i,0); - _x(i+_first,1) = _buf(i,1); - _x(i+_first,2) = _buf(i,2); - _tag(i+_first) = static_cast (_buf(i,3)); - _type(i+_first) = static_cast (_buf(i,4)); - _mask(i+_first) = static_cast (_buf(i,5)); - _dpdTheta(i+_first) = _buf(i,6); - _uCond(i+_first) = _buf(i,7); - _uMech(i+_first) = _buf(i,8); - _uChem(i+_first) = _buf(i,9); - _uCG(i+_first) = _buf(i,10); - _uCGnew(i+_first) = _buf(i,11); -// printf("%i %i %lf %lf %lf %i BORDER\n",_tag(i+_first),i+_first,_x(i+_first,0),_x(i+_first,1),_x(i+_first,2),_type(i+_first)); - } -}; - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, - const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - if(space==Host) { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), - h_x,h_tag,h_type,h_mask, - h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, - first); - Kokkos::parallel_for(n,f); - LMPHostType::fence(); - } else { - struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), - d_x,d_tag,d_type,d_mask, - d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, - first); - Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_tag(i) = (tagint) ubuf(buf[m++]).i; - h_type(i) = (int) ubuf(buf[m++]).i; - h_mask(i) = (int) ubuf(buf[m++]).i; - h_dpdTheta(i) = buf[m++]; - h_uCond(i) = buf[m++]; - h_uMech(i) = buf[m++]; - h_uChem(i) = buf[m++]; - h_uCG(i) = buf[m++]; - h_uCGnew(i) = buf[m++]; - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]-> - unpack_border(n,first,&buf[m]); -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); - h_x(i,0) = buf[m++]; - h_x(i,1) = buf[m++]; - h_x(i,2) = buf[m++]; - h_tag(i) = (tagint) ubuf(buf[m++]).i; - h_type(i) = (int) ubuf(buf[m++]).i; - h_mask(i) = (int) ubuf(buf[m++]).i; - h_v(i,0) = buf[m++]; - h_v(i,1) = buf[m++]; - h_v(i,2) = buf[m++]; - h_dpdTheta(i) = buf[m++]; - h_uCond(i) = buf[m++]; - h_uMech(i) = buf[m++]; - h_uChem(i) = buf[m++]; - h_uCG(i) = buf[m++]; - h_uCGnew(i) = buf[m++]; - } - - if (atom->nextra_border) - for (int iextra = 0; iextra < atom->nextra_border; iextra++) - m += modify->fix[atom->extra_border[iextra]]-> - unpack_border(n,first,&buf[m]); -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_dpdTheta(i) = buf[m++]; - h_uCond(i) = buf[m++]; - h_uMech(i) = buf[m++]; - h_uChem(i) = buf[m++]; - h_uCG(i) = buf[m++]; - h_uCGnew(i) = buf[m++]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) -{ - int i,m,last; - - m = 0; - last = first + n; - for (i = first; i < last; i++) { - h_dpdTheta(i) = buf[m++]; - h_uCond(i) = buf[m++]; - h_uMech(i) = buf[m++]; - h_uChem(i) = buf[m++]; - h_uCG(i) = buf[m++]; - h_uCGnew(i) = buf[m++]; - } - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_PackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_x_array_randomread _x; - typename AT::t_v_array_randomread _v; - typename AT::t_tagint_1d_randomread _tag; - typename AT::t_int_1d_randomread _type; - typename AT::t_int_1d_randomread _mask; - typename AT::t_imageint_1d_randomread _image; - typename AT::t_efloat_1d_randomread _dpdTheta,_uCond,_uMech,_uChem,_uCG,_uCGnew; - typename AT::t_x_array _xw; - typename AT::t_v_array _vw; - typename AT::t_tagint_1d _tagw; - typename AT::t_int_1d _typew; - typename AT::t_int_1d _maskw; - typename AT::t_imageint_1d _imagew; - typename AT::t_efloat_1d _dpdThetaw,_uCondw,_uMechw,_uChemw,_uCGw,_uCGneww; - - typename AT::t_xfloat_2d_um _buf; - typename AT::t_int_1d_const _sendlist; - typename AT::t_int_1d_const _copylist; - int _nlocal,_dim; - X_FLOAT _lo,_hi; - - AtomVecDPDKokkos_PackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d sendlist, - typename AT::tdual_int_1d copylist,int nlocal, int dim, - X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _dpdTheta(atom->k_dpdTheta.view()), - _uCond(atom->k_uCond.view()), - _uMech(atom->k_uMech.view()), - _uChem(atom->k_uChem.view()), - _uCG(atom->k_uCG.view()), - _uCGnew(atom->k_uCGnew.view()), - _xw(atom->k_x.view()), - _vw(atom->k_v.view()), - _tagw(atom->k_tag.view()), - _typew(atom->k_type.view()), - _maskw(atom->k_mask.view()), - _imagew(atom->k_image.view()), - _dpdThetaw(atom->k_dpdTheta.view()), - _uCondw(atom->k_uCond.view()), - _uMechw(atom->k_uMech.view()), - _uChemw(atom->k_uChem.view()), - _uCGw(atom->k_uCG.view()), - _uCGneww(atom->k_uCGnew.view()), - _sendlist(sendlist.template view()), - _copylist(copylist.template view()), - _nlocal(nlocal),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 17; - const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; - - buffer_view(_buf,buf,maxsendlist,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &mysend) const { - const int i = _sendlist(mysend); - _buf(mysend,0) = 17; - _buf(mysend,1) = _x(i,0); - _buf(mysend,2) = _x(i,1); - _buf(mysend,3) = _x(i,2); - _buf(mysend,4) = _v(i,0); - _buf(mysend,5) = _v(i,1); - _buf(mysend,6) = _v(i,2); - _buf(mysend,7) = _tag[i]; - _buf(mysend,8) = _type[i]; - _buf(mysend,9) = _mask[i]; - _buf(mysend,10) = _image[i]; - _buf(mysend,11) = _dpdTheta[i]; - _buf(mysend,12) = _uCond[i]; - _buf(mysend,13) = _uMech[i]; - _buf(mysend,14) = _uChem[i]; - _buf(mysend,15) = _uCG[i]; - _buf(mysend,16) = _uCGnew[i]; - const int j = _copylist(mysend); - - if(j>-1) { - _xw(i,0) = _x(j,0); - _xw(i,1) = _x(j,1); - _xw(i,2) = _x(j,2); - _vw(i,0) = _v(j,0); - _vw(i,1) = _v(j,1); - _vw(i,2) = _v(j,2); - _tagw[i] = _tag(j); - _typew[i] = _type(j); - _maskw[i] = _mask(j); - _imagew[i] = _image(j); - _dpdThetaw[i] = _dpdTheta(j); - _uCondw[i] = _uCond(j); - _uMechw[i] = _uMech(j); - _uChemw[i] = _uChem(j); - _uCGw[i] = _uCG(j); - _uCGneww[i] = _uCGnew(j); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi ) -{ - if(nsend > (int) (k_buf.view().dimension_0()*k_buf.view().dimension_1())/17) { - int newsize = nsend*17/k_buf.view().dimension_1()+1; - k_buf.resize(newsize,k_buf.view().dimension_1()); - } - if(space == Host) { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); - Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); - return nsend*17; - } else { - AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); - Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); - return nsend*17; - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_exchange(int i, double *buf) -{ - int m = 1; - buf[m++] = h_x(i,0); - buf[m++] = h_x(i,1); - buf[m++] = h_x(i,2); - buf[m++] = h_v(i,0); - buf[m++] = h_v(i,1); - buf[m++] = h_v(i,2); - buf[m++] = ubuf(h_tag(i)).d; - buf[m++] = ubuf(h_type(i)).d; - buf[m++] = ubuf(h_mask(i)).d; - buf[m++] = ubuf(h_image(i)).d; - buf[m++] = h_dpdTheta[i]; - buf[m++] = h_uCond[i]; - buf[m++] = h_uMech[i]; - buf[m++] = h_uChem[i]; - buf[m++] = h_uCG[i]; - buf[m++] = h_uCGnew[i]; - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); - - buf[0] = m; - return m; -} - -/* ---------------------------------------------------------------------- */ - -template -struct AtomVecDPDKokkos_UnpackExchangeFunctor { - typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_x_array _x; - typename AT::t_v_array _v; - typename AT::t_tagint_1d _tag; - typename AT::t_int_1d _type; - typename AT::t_int_1d _mask; - typename AT::t_imageint_1d _image; - typename AT::t_efloat_1d _dpdTheta; - typename AT::t_efloat_1d _uCond; - typename AT::t_efloat_1d _uMech; - typename AT::t_efloat_1d _uChem; - typename AT::t_efloat_1d _uCG; - typename AT::t_efloat_1d _uCGnew; - - typename AT::t_xfloat_2d_um _buf; - typename AT::t_int_1d _nlocal; - int _dim; - X_FLOAT _lo,_hi; - - AtomVecDPDKokkos_UnpackExchangeFunctor( - const AtomKokkos* atom, - const typename AT::tdual_xfloat_2d buf, - typename AT::tdual_int_1d nlocal, - int dim, X_FLOAT lo, X_FLOAT hi): - _x(atom->k_x.view()), - _v(atom->k_v.view()), - _tag(atom->k_tag.view()), - _type(atom->k_type.view()), - _mask(atom->k_mask.view()), - _image(atom->k_image.view()), - _nlocal(nlocal.template view()),_dim(dim), - _lo(lo),_hi(hi){ - const size_t elements = 17; - const int maxsendlist = (buf.template view().dimension_0()*buf.template view().dimension_1())/elements; - - buffer_view(_buf,buf,maxsendlist,elements); - } - - KOKKOS_INLINE_FUNCTION - void operator() (const int &myrecv) const { - X_FLOAT x = _buf(myrecv,_dim+1); - if (x >= _lo && x < _hi) { - int i = Kokkos::atomic_fetch_add(&_nlocal(0),1); - _x(i,0) = _buf(myrecv,1); - _x(i,1) = _buf(myrecv,2); - _x(i,2) = _buf(myrecv,3); - _v(i,0) = _buf(myrecv,4); - _v(i,1) = _buf(myrecv,5); - _v(i,2) = _buf(myrecv,6); - _tag[i] = _buf(myrecv,7); - _type[i] = _buf(myrecv,8); - _mask[i] = _buf(myrecv,9); - _image[i] = _buf(myrecv,10); - _dpdTheta[i] = _buf(myrecv,11); - _uCond[i] = _buf(myrecv,12); - _uMech[i] = _buf(myrecv,13); - _uChem[i] = _buf(myrecv,14); - _uCG[i] = _buf(myrecv,15); - _uCGnew[i] = _buf(myrecv,16); - } - } -}; - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) { - if(space == Host) { - k_count.h_view(0) = nlocal; - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); - LMPHostType::fence(); - return k_count.h_view(0); - } else { - k_count.h_view(0) = nlocal; - k_count.modify(); - k_count.sync(); - AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); - Kokkos::parallel_for(nrecv/17,f); - LMPDeviceType::fence(); - k_count.modify(); - k_count.sync(); - - return k_count.h_view(0); - } -} - -/* ---------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_exchange(double *buf) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK); - - int m = 1; - h_x(nlocal,0) = buf[m++]; - h_x(nlocal,1) = buf[m++]; - h_x(nlocal,2) = buf[m++]; - h_v(nlocal,0) = buf[m++]; - h_v(nlocal,1) = buf[m++]; - h_v(nlocal,2) = buf[m++]; - h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; - h_type(nlocal) = (int) ubuf(buf[m++]).i; - h_mask(nlocal) = (int) ubuf(buf[m++]).i; - h_image(nlocal) = (imageint) ubuf(buf[m++]).i; - h_dpdTheta[nlocal] = buf[m++]; - h_uCond[nlocal] = buf[m++]; - h_uMech[nlocal] = buf[m++]; - h_uChem[nlocal] = buf[m++]; - h_uCG[nlocal] = buf[m++]; - h_uCGnew[nlocal] = buf[m++]; - - if (atom->nextra_grow) - for (int iextra = 0; iextra < atom->nextra_grow; iextra++) - m += modify->fix[atom->extra_grow[iextra]]-> - unpack_exchange(nlocal,&buf[m]); - - atom->nlocal++; - return m; -} - -/* ---------------------------------------------------------------------- - size of restart data for all atoms owned by this proc - include extra data stored by fixes -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::size_restart() -{ - int i; - - int nlocal = atom->nlocal; - int n = 15 * nlocal; // 11 + dpdTheta + uCond + uMech + uChem - - if (atom->nextra_restart) - for (int iextra = 0; iextra < atom->nextra_restart; iextra++) - for (i = 0; i < nlocal; i++) - n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); - - return n; -} - -/* ---------------------------------------------------------------------- - pack atom I's data for restart file including extra quantities - xyz must be 1st 3 values, so that read_restart can test on them - molecular types may be negative, but write as positive -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_restart(int i, double *buf) -{ - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK ); - - int m = 1; - buf[m++] = h_x(i,0); - buf[m++] = h_x(i,1); - buf[m++] = h_x(i,2); - buf[m++] = ubuf(h_tag(i)).d; - buf[m++] = ubuf(h_type(i)).d; - buf[m++] = ubuf(h_mask(i)).d; - buf[m++] = ubuf(h_image(i)).d; - buf[m++] = h_v(i,0); - buf[m++] = h_v(i,1); - buf[m++] = h_v(i,2); - buf[m++] = h_dpdTheta[i]; - buf[m++] = h_uCond[i]; - buf[m++] = h_uMech[i]; - buf[m++] = h_uChem[i]; - - if (atom->nextra_restart) - for (int iextra = 0; iextra < atom->nextra_restart; iextra++) - m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); - - buf[0] = m; - return m; -} - -/* ---------------------------------------------------------------------- - unpack data for one atom from restart file including extra quantities -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::unpack_restart(double *buf) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) { - grow(0); - if (atom->nextra_store) - memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); - } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK ); - - int m = 1; - h_x(nlocal,0) = buf[m++]; - h_x(nlocal,1) = buf[m++]; - h_x(nlocal,2) = buf[m++]; - h_tag(nlocal) = (tagint) ubuf(buf[m++]).i; - h_type(nlocal) = (int) ubuf(buf[m++]).i; - h_mask(nlocal) = (int) ubuf(buf[m++]).i; - h_image(nlocal) = (imageint) ubuf(buf[m++]).i; - h_v(nlocal,0) = buf[m++]; - h_v(nlocal,1) = buf[m++]; - h_v(nlocal,2) = buf[m++]; - h_dpdTheta[nlocal] = buf[m++]; - h_uCond[nlocal] = buf[m++]; - h_uMech[nlocal] = buf[m++]; - h_uChem[nlocal] = buf[m++]; - - double **extra = atom->extra; - if (atom->nextra_store) { - int size = static_cast (ubuf(buf[m++]).i) - m; - for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; - } - - atom->nlocal++; - return m; -} - -/* ---------------------------------------------------------------------- - create one atom of itype at coord - set other values to defaults -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::create_atom(int itype, double *coord) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) { - //if(nlocal>2) printf("typeA: %i %i\n",type[0],type[1]); - atomKK->modified(Host,ALL_MASK); - grow(0); - //if(nlocal>2) printf("typeB: %i %i\n",type[0],type[1]); - } - atomKK->modified(Host,ALL_MASK); - - tag[nlocal] = 0; - type[nlocal] = itype; - h_x(nlocal,0) = coord[0]; - h_x(nlocal,1) = coord[1]; - h_x(nlocal,2) = coord[2]; - h_mask[nlocal] = 1; - h_image[nlocal] = ((tagint) IMGMAX << IMG2BITS) | - ((tagint) IMGMAX << IMGBITS) | IMGMAX; - h_v(nlocal,0) = 0.0; - h_v(nlocal,1) = 0.0; - h_v(nlocal,2) = 0.0; - h_rho[nlocal] = 0.0; - h_dpdTheta[nlocal] = 0.0; - h_uCond[nlocal] = 0.0; - h_uMech[nlocal] = 0.0; - h_uChem[nlocal] = 0.0; - h_uCG[nlocal] = 0.0; - h_uCGnew[nlocal] = 0.0; - h_duChem[nlocal] = 0.0; - - atom->nlocal++; -} - -/* ---------------------------------------------------------------------- - unpack one line from Atoms section of data file - initialize other atom quantities -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::data_atom(double *coord, tagint imagetmp, - char **values) -{ - int nlocal = atom->nlocal; - if (nlocal == nmax) grow(0); - - h_tag[nlocal] = ATOTAGINT(values[0]); - h_type[nlocal] = atoi(values[1]); - if (type[nlocal] <= 0 || type[nlocal] > atom->ntypes) - error->one(FLERR,"Invalid atom type in Atoms section of data file"); - - h_dpdTheta[nlocal] = atof(values[2]); - if (h_dpdTheta[nlocal] <= 0) - error->one(FLERR,"Internal temperature in Atoms section of date file must be > zero"); - - h_x(nlocal,0) = coord[0]; - h_x(nlocal,1) = coord[1]; - h_x(nlocal,2) = coord[2]; - - h_image[nlocal] = imagetmp; - - h_mask[nlocal] = 1; - h_v(nlocal,0) = 0.0; - h_v(nlocal,1) = 0.0; - h_v(nlocal,2) = 0.0; - - h_rho[nlocal] = 0.0; - h_uCond[nlocal] = 0.0; - h_uMech[nlocal] = 0.0; - h_uChem[nlocal] = 0.0; - h_uCG[nlocal] = 0.0; - h_uCGnew[nlocal] = 0.0; - - atomKK->modified(Host,ALL_MASK); - - atom->nlocal++; -} - -/* ---------------------------------------------------------------------- - unpack hybrid quantities from one line in Atoms section of data file - initialize other atom quantities for this sub-style -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::data_atom_hybrid(int nlocal, char **values) -{ - h_dpdTheta(nlocal) = atof(values[0]); - - return 1; -} - -/* ---------------------------------------------------------------------- - pack atom info for data file including 3 image flags -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::pack_data(double **buf) -{ - int nlocal = atom->nlocal; - for (int i = 0; i < nlocal; i++) { - buf[i][0] = ubuf(h_tag(i)).d; - buf[i][1] = ubuf(h_type(i)).d; - buf[i][2] = h_dpdTheta(i); - buf[i][3] = h_x(i,0); - buf[i][4] = h_x(i,1); - buf[i][5] = h_x(i,2); - buf[i][6] = (h_image[i] & IMGMASK) - IMGMAX; - buf[i][7] = (h_image[i] >> IMGBITS & IMGMASK) - IMGMAX; - buf[i][8] = (h_image[i] >> IMG2BITS) - IMGMAX; - } -} - -/* ---------------------------------------------------------------------- - pack hybrid atom info for data file -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::pack_data_hybrid(int i, double *buf) -{ - buf[0] = h_dpdTheta(i); - return 1; -} - -/* ---------------------------------------------------------------------- - write atom info to data file including 3 image flags -------------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::write_data(FILE *fp, int n, double **buf) -{ - for (int i = 0; i < n; i++) - fprintf(fp,TAGINT_FORMAT " %d %-1.16e %-1.16e %-1.16e %-1.16e %d %d %d\n", - (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, - buf[i][2],buf[i][3],buf[i][4],buf[i][5], - (int) ubuf(buf[i][6]).i,(int) ubuf(buf[i][7]).i, - (int) ubuf(buf[i][8]).i); -} - -/* ---------------------------------------------------------------------- - write hybrid atom info to data file -------------------------------------------------------------------------- */ - -int AtomVecDPDKokkos::write_data_hybrid(FILE *fp, double *buf) -{ - fprintf(fp," %-1.16e",buf[0]); - return 1; -} - -/* ---------------------------------------------------------------------- - return # of bytes of allocated memory -------------------------------------------------------------------------- */ - -bigint AtomVecDPDKokkos::memory_usage() -{ - bigint bytes = 0; - - if (atom->memcheck("tag")) bytes += memory->usage(tag,nmax); - if (atom->memcheck("type")) bytes += memory->usage(type,nmax); - if (atom->memcheck("mask")) bytes += memory->usage(mask,nmax); - if (atom->memcheck("image")) bytes += memory->usage(image,nmax); - if (atom->memcheck("x")) bytes += memory->usage(x,nmax,3); - if (atom->memcheck("v")) bytes += memory->usage(v,nmax,3); - if (atom->memcheck("f")) bytes += memory->usage(f,nmax*commKK->nthreads,3); - if (atom->memcheck("rho")) bytes += memory->usage(rho,nmax); - if (atom->memcheck("dpdTheta")) bytes += memory->usage(dpdTheta,nmax); - if (atom->memcheck("uCond")) bytes += memory->usage(uCond,nmax); - if (atom->memcheck("uMech")) bytes += memory->usage(uMech,nmax); - if (atom->memcheck("uChem")) bytes += memory->usage(uChem,nmax); - if (atom->memcheck("uCG")) bytes += memory->usage(uCG,nmax); - if (atom->memcheck("uCGnew")) bytes += memory->usage(uCGnew,nmax); - if (atom->memcheck("duChem")) bytes += memory->usage(duChem,nmax); - - return bytes; -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if ((mask & X_MASK) && atomKK->k_x.need_sync()) - perform_async_copy(atomKK->k_x,space); - if ((mask & V_MASK) && atomKK->k_v.need_sync()) - perform_async_copy(atomKK->k_v,space); - if ((mask & F_MASK) && atomKK->k_f.need_sync()) - perform_async_copy(atomKK->k_f,space); - if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) - perform_async_copy(atomKK->k_tag,space); - if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) - perform_async_copy(atomKK->k_type,space); - if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) - perform_async_copy(atomKK->k_mask,space); - if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) - perform_async_copy(atomKK->k_image,space); - } else { - if ((mask & X_MASK) && atomKK->k_x.need_sync()) - perform_async_copy(atomKK->k_x,space); - if ((mask & V_MASK) && atomKK->k_v.need_sync()) - perform_async_copy(atomKK->k_v,space); - if ((mask & F_MASK) && atomKK->k_f.need_sync()) - perform_async_copy(atomKK->k_f,space); - if ((mask & TAG_MASK) && atomKK->k_tag.need_sync()) - perform_async_copy(atomKK->k_tag,space); - if ((mask & TYPE_MASK) && atomKK->k_type.need_sync()) - perform_async_copy(atomKK->k_type,space); - if ((mask & MASK_MASK) && atomKK->k_mask.need_sync()) - perform_async_copy(atomKK->k_mask,space); - if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) - perform_async_copy(atomKK->k_image,space); - } -} - -/* ---------------------------------------------------------------------- */ - -void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) -{ - if (space == Device) { - if (mask & X_MASK) atomKK->k_x.modify(); - if (mask & V_MASK) atomKK->k_v.modify(); - if (mask & F_MASK) atomKK->k_f.modify(); - if (mask & TAG_MASK) atomKK->k_tag.modify(); - if (mask & TYPE_MASK) atomKK->k_type.modify(); - if (mask & MASK_MASK) atomKK->k_mask.modify(); - if (mask & IMAGE_MASK) atomKK->k_image.modify(); - } else { - if (mask & X_MASK) atomKK->k_x.modify(); - if (mask & V_MASK) atomKK->k_v.modify(); - if (mask & F_MASK) atomKK->k_f.modify(); - if (mask & TAG_MASK) atomKK->k_tag.modify(); - if (mask & TYPE_MASK) atomKK->k_type.modify(); - if (mask & MASK_MASK) atomKK->k_mask.modify(); - if (mask & IMAGE_MASK) atomKK->k_image.modify(); - } -} - diff --git a/src/USER-DPD/atom_vec_dpd_kokkos.h b/src/USER-DPD/atom_vec_dpd_kokkos.h deleted file mode 100644 index d108e58ae7..0000000000 --- a/src/USER-DPD/atom_vec_dpd_kokkos.h +++ /dev/null @@ -1,135 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef ATOM_CLASS - -AtomStyle(dpd/kk,AtomVecDPDKokkos) - -#else - -#ifndef LMP_ATOM_VEC_DPD_KOKKOS_H -#define LMP_ATOM_VEC_DPD_KOKKOS_H - -#include "atom_vec_kokkos.h" -#include "kokkos_type.h" - -namespace LAMMPS_NS { - -class AtomVecDPDKokkos : public AtomVecKokkos { - public: - AtomVecDPDKokkos(class LAMMPS *); - virtual ~AtomVecDPDKokkos() {} - void grow(int); - void copy(int, int, int); - int pack_comm(int, int *, double *, int, int *); - int pack_comm_vel(int, int *, double *, int, int *); - int pack_comm_hybrid(int, int *, double *); - void unpack_comm(int, int, double *); - void unpack_comm_vel(int, int, double *); - int unpack_comm_hybrid(int, int, double *); - int pack_reverse(int, int, double *); - void unpack_reverse(int, int *, double *); - int pack_border(int, int *, double *, int, int *); - int pack_border_vel(int, int *, double *, int, int *); - int pack_border_hybrid(int, int *, double *); - void unpack_border(int, int, double *); - void unpack_border_vel(int, int, double *); - int unpack_border_hybrid(int, int, double *); - int pack_exchange(int, double *); - int unpack_exchange(double *); - int size_restart(); - int pack_restart(int, double *); - int unpack_restart(double *); - void create_atom(int, double *); - void data_atom(double *, tagint, char **); - int data_atom_hybrid(int, char **); - void pack_data(double **); - int pack_data_hybrid(int, double *); - void write_data(FILE *, int, double **); - int write_data_hybrid(FILE *, double *); - bigint memory_usage(); - - void grow_reset(); - int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, - const int & iswap, - const DAT::tdual_xfloat_2d &buf, - const int &pbc_flag, const int pbc[]); - void unpack_comm_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf); - int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, - const int & iswap, const int nfirst, - const int &pbc_flag, const int pbc[]); - int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, - DAT::tdual_xfloat_2d buf,int iswap, - int pbc_flag, int *pbc, ExecutionSpace space); - void unpack_border_kokkos(const int &n, const int &nfirst, - const DAT::tdual_xfloat_2d &buf, - ExecutionSpace space); - int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, - DAT::tdual_int_1d k_sendlist, - DAT::tdual_int_1d k_copylist, - ExecutionSpace space, int dim, - X_FLOAT lo, X_FLOAT hi); - int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, - int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, - ExecutionSpace space); - - void sync(ExecutionSpace space, unsigned int mask); - void modified(ExecutionSpace space, unsigned int mask); - void sync_overlapping_device(ExecutionSpace space, unsigned int mask); - double *uCond,*uMech,*uChem,*uCG,*uCGnew,*rho,*dpdTheta; - double *duChem; - - protected: - DAT::t_efloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem; - HAT::t_efloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem; - - tagint *tag; - imageint *image; - int *type,*mask; - double **x,**v,**f; - - DAT::t_tagint_1d d_tag; - HAT::t_tagint_1d h_tag; - DAT::t_imageint_1d d_image; - HAT::t_imageint_1d h_image; - DAT::t_int_1d d_type, d_mask; - HAT::t_int_1d h_type, h_mask; - - DAT::t_x_array d_x; - DAT::t_v_array d_v; - DAT::t_f_array d_f; - HAT::t_x_array h_x; - HAT::t_v_array h_v; - HAT::t_f_array h_f; - - DAT::tdual_int_1d k_count; -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Per-processor system is too big - -The number of owned atoms plus ghost atoms on a single -processor must fit in 32-bit integer. - -E: Invalid atom type in Atoms section of data file - -Atom types must range from 1 to specified # of types. - -*/ From 1dbf6d443f45a96887c02999bec3832d3c534b61 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 13 Dec 2016 16:43:40 -0700 Subject: [PATCH 007/267] Adding Kokkos files --- src/Depend.sh | 4 + src/KOKKOS/Install.sh | 6 + src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 354 ++++++++++ src/KOKKOS/fix_eos_table_rx_kokkos.h | 152 +++++ src/KOKKOS/pair_exp6_rx_kokkos.cpp | 46 +- src/KOKKOS/pair_exp6_rx_kokkos.h | 9 +- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 791 +++++++++++++++++++++++ src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 215 ++++++ src/KOKKOS/pair_table_kokkos.cpp | 758 +--------------------- src/KOKKOS/pair_table_kokkos.h | 44 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 634 ++++++++++++++++++ src/KOKKOS/pair_table_rx_kokkos.h | 269 ++++++++ src/USER-DPD/pair_multi_lucy.h | 2 +- src/USER-DPD/pair_multi_lucy_rx.cpp | 6 +- src/USER-DPD/pair_multi_lucy_rx.h | 2 +- src/pair_table.h | 6 +- 16 files changed, 2493 insertions(+), 805 deletions(-) create mode 100644 src/KOKKOS/fix_eos_table_rx_kokkos.cpp create mode 100644 src/KOKKOS/fix_eos_table_rx_kokkos.h create mode 100644 src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp create mode 100644 src/KOKKOS/pair_multi_lucy_rx_kokkos.h create mode 100644 src/KOKKOS/pair_table_rx_kokkos.cpp create mode 100644 src/KOKKOS/pair_table_rx_kokkos.h diff --git a/src/Depend.sh b/src/Depend.sh index 44964d5182..51f83b2ea5 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -113,6 +113,10 @@ if (test $1 = "USER-CG-CMM") then depend USER-OMP fi +if (test $1 = "USER-DPD") then + depend KOKKOS +fi + if (test $1 = "USER-FEP") then depend USER-OMP fi diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 1381a1978c..567e825642 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -73,6 +73,8 @@ action domain_kokkos.cpp action domain_kokkos.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h +action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp +action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h action fix_langevin_kokkos.cpp action fix_langevin_kokkos.h action fix_nh_kokkos.cpp @@ -171,6 +173,8 @@ action pair_lj_gromacs_kokkos.cpp action pair_lj_gromacs_kokkos.h action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp action pair_lj_sdk_kokkos.h pair_lj_sdk.h +action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp +action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h action pair_reax_c_kokkos.cpp pair_reax_c.cpp action pair_reax_c_kokkos.h pair_reax_c.h action pair_sw_kokkos.cpp pair_sw.cpp @@ -179,6 +183,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h +action pair_table_rx_kokkos.cpp pair_table_rx.cpp +action pair_table_rx_kokkos.h pair_table_rx.h action pair_tersoff_kokkos.cpp pair_tersoff.cpp action pair_tersoff_kokkos.h pair_tersoff.h action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp new file mode 100644 index 0000000000..a1e0b1a07d --- /dev/null +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -0,0 +1,354 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (Sandia) +------------------------------------------------------------------------- */ + +#include +#include +#include "fix_eos_table_rx_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "comm.h" +#include +#include "modify.h" +#include "atom_masks.h" + +#define MAXLINE 1024 + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char **arg) : + FixEOStableRX(lmp, narg, arg) +{ + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +FixEOStableRXKokkos::~FixEOStableRXKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::setup(int vflag) +{ + int nlocal = atom->nlocal; + mask = atomKK->k_mask.view(); + uCond = atomKK->k_uCond.view(); + uMech = atomKK->k_uMech.view(); + uChem = atomKK->k_uChem.view(); + dpdTheta= atomKK->k_dpdTheta.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + double duChem; + + for (int i = 0; i < nlocal; i++) // parallel_for + if (mask[i] & groupbit){ + duChem = uCG[i] - uCGnew[i]; + uChem[i] += duChem; + uCG[i] = 0.0; + uCGnew[i] = 0.0; + } + + // Communicate the updated momenta and velocities to all nodes + comm->forward_comm_fix(this); + + for (int i = 0; i < nlocal; i++) // parallel_for + if (mask[i] & groupbit) + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::init() +{ + int nlocal = atom->nlocal; + mask = atomKK->k_mask.view(); + uCond = atomKK->k_uCond.view(); + uMech = atomKK->k_uMech.view(); + uChem = atomKK->k_uChem.view(); + dpdTheta= atomKK->k_dpdTheta.view(); + double tmp; + + if(this->restart_reset){ + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); + } else { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + if(dpdTheta[i] <= 0.0) + error->one(FLERR,"Internal temperature <= zero"); + energy_lookup(i,dpdTheta[i],tmp); + uCond[i] = tmp / 2.0; + uMech[i] = tmp / 2.0; + uChem[i] = 0.0; + } + } +} + + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::post_integrate() +{ + int nlocal = atom->nlocal; + mask = atomKK->k_mask.view(); + uCond = atomKK->k_uCond.view(); + uMech = atomKK->k_uMech.view(); + uChem = atomKK->k_uChem.view(); + dpdTheta= atomKK->k_dpdTheta.view(); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit){ + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); + if(dpdTheta[i] <= 0.0) + error->one(FLERR,"Internal temperature <= zero"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::end_of_step() +{ + int nlocal = atom->nlocal; + mask = atomKK->k_mask.view(); + uCond = atomKK->k_uCond.view(); + uMech = atomKK->k_uMech.view(); + uChem = atomKK->k_uChem.view(); + dpdTheta= atomKK->k_dpdTheta.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + double duChem; + + // Communicate the ghost uCGnew + comm->reverse_comm_fix(this); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit){ + duChem = uCG[i] - uCGnew[i]; + uChem[i] += duChem; + uCG[i] = 0.0; + uCGnew[i] = 0.0; + } + + // Communicate the updated momenta and velocities to all nodes + comm->forward_comm_fix(this); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit){ + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); + if(dpdTheta[i] <= 0.0) + error->one(FLERR,"Internal temperature <= zero"); + } +} + +/* ---------------------------------------------------------------------- + calculate potential ui at temperature thetai +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::energy_lookup(int id, double thetai, double &ui) const +{ + int itable; + double fraction, uTmp, nTotal; + + ui = 0.0; + nTotal = 0.0; + for(int ispecies=0;ispecieslo); + thetai = MIN(thetai,tb->hi); + + if (tabstyle == LINEAR) { + itable = static_cast ((thetai - tb->lo) * tb->invdelta); + fraction = (thetai - tb->r[itable]) * tb->invdelta; + uTmp = tb->e[itable] + fraction*tb->de[itable]; + + uTmp += dHf[ispecies]; + // mol fraction form: + ui += atom->dvector[ispecies][id]*uTmp; + nTotal += atom->dvector[ispecies][id]; + } + } + ui = ui - double(nTotal+1.5)*force->boltz*thetai; +} + +/* ---------------------------------------------------------------------- + calculate temperature thetai at energy ui +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::temperature_lookup(int id, double ui, double &thetai) const +{ + Table *tb = &tables[0]; + + int it; + double t1,t2,u1,u2,f1,f2; + double maxit = 100; + double temp; + double delta = 0.001; + + // Store the current thetai in t1 + t1 = MAX(thetai,tb->lo); + t1 = MIN(t1,tb->hi); + if(t1==tb->hi) delta = -delta; + + // Compute u1 at thetai + energy_lookup(id,t1,u1); + + // Compute f1 + f1 = u1 - ui; + + // Compute guess of t2 + t2 = (1.0 + delta)*t1; + + // Compute u2 at t2 + energy_lookup(id,t2,u2); + + // Compute f1 + f2 = u2 - ui; + + // Apply the Secant Method + for(it=0; itone(FLERR,"NaN detected in secant solver."); + temp = t1; + temp = MAX(temp,tb->lo); + temp = MIN(temp,tb->hi); + char str[256]; + sprintf(str,"Secant solver did not converge because table bounds were exceeded: it=%d id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf dpdTheta=%lf\n",it,id,ui,thetai,t1,t2,f1,f2,temp); + error->warning(FLERR,str); + break; + } + temp = t2 - f2*(t2-t1)/(f2-f1); + if(fabs(temp-t2) < 1e-6) break; + f1 = f2; + t1 = t2; + t2 = temp; + energy_lookup(id,t2,u2); + f2 = u2 - ui; + } + if(it==maxit){ + char str[256]; + sprintf(str,"Maxit exceeded in secant solver: id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf\n",id,ui,thetai,t1,t2,f1,f2); + if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2)) + error->one(FLERR,"NaN detected in secant solver."); + error->one(FLERR,str); + } + thetai = temp; +} + +/* ---------------------------------------------------------------------- */ + +template +int FixEOStableRXKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) +{ + int ii,jj,m; + uChem = atomKK->k_uChem.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + + m = 0; + for (ii = 0; ii < n; ii++) { + jj = list[ii]; + buf[m++] = uChem[jj]; + buf[m++] = uCG[jj]; + buf[m++] = uCGnew[jj]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + int ii,m,last; + uChem = atomKK->k_uChem.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + + m = 0; + last = first + n ; + for (ii = first; ii < last; ii++){ + uChem[ii] = buf[m++]; + uCG[ii] = buf[m++]; + uCGnew[ii] = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixEOStableRXKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + buf[m++] = uCG[i]; + buf[m++] = uCGnew[i]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEOStableRXKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + + uCG[j] += buf[m++]; + uCGnew[j] += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixEOStableRXKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixEOStableRXKokkos; +#endif +} diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h new file mode 100644 index 0000000000..9eccd67c54 --- /dev/null +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -0,0 +1,152 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(eos/table/rx/kk,FixEOStableRXKokkos) +FixStyle(eos/table/rx/kk/device,FixEOStableRXKokkos) +FixStyle(eos/table/rx/kk/host,FixEOStableRXKokkos) + +#else + +#ifndef LMP_FIX_EOS_TABLE_RX_KOKKOS_H +#define LMP_FIX_EOS_TABLE_RX_KOKKOS_H + +#include "fix_eos_table_rx.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixEOStableRXKokkos : public FixEOStableRX { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + FixEOStableRXKokkos(class LAMMPS *, int, char **); + virtual ~FixEOStableRXKokkos(); + void setup(int); + void init(); + void post_integrate(); + void end_of_step(); + + KOKKOS_INLINE_FUNCTION + void energy_lookup(int, double, double &) const; + + KOKKOS_INLINE_FUNCTION + void temperature_lookup(int, double, double &) const; + + protected: + //struct Table { + // int ninput; + // double lo,hi; + // double *rfile,*efile; + // double *e2file; + // double delta,invdelta,deltasq6; + // double *r,*e,*de,*e2; + //}; + //Table *tables, *tables2; + + void allocate(); + + //double *dHf; + + typename AT::t_int_1d mask; + typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem; + + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + int pack_forward_comm(int , int *, double *, int, int *); + void unpack_forward_comm(int , int , double *); + + //int *eosSpecies; + }; +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: FixEOStableRXKokkos requires a fix rx command. + +The fix rx command must come before the pair style command in the input file + +E: There are no rx species specified + +There must be at least one species specified through the fix rx command + +E: Invalid eos/table/rx length + +The eos/table/rx table must have more than one entry. + +E: eos/table/rx values are not increasing + +The equation-of-state must an increasing function + +E: Internal temperature <= zero. + +Self-explanatory. + +E: Cannot open eos table/rx potential file %s + +Self-explanatory. + +E: Incorrect format in eos table/rx file + +Self-explanatory. + +E: Cannot open file %s + +Self-explanatory. + +E: Did not find keyword in table file + +Self-explanatory. + +E: Illegal fix eos/table/rx command + +Incorrect number of arguments specified for the fix eos/table/rx command. + +E: Invalid keyword in fix eos/table/rx parameters + +Self-explanatory. + +E: The number of columns in fix eos/table/rx does not match the number of species. + +Self-explanatory. Check format for fix eos/table/rx file. + +E: fix eos/table/rx parameters did not set N + +The number of table entries was not set in the eos/table/rx file + +W: Secant solver did not converge because table bounds were exceeded + +The secant solver failed to converge, resulting in the lower or upper table bound temperature to be returned + +E: NaN detected in secant solver. + +Self-explanatory. + +E: Maxit exceeded in secant solver + +The maximum number of interations was exceeded in the secant solver + +*/ diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 754fa4667d..a7d5569537 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -31,6 +31,8 @@ #include "modify.h" #include "fix.h" #include +#include "atom_masks.h" +#include "neigh_request.h" using namespace LAMMPS_NS; using namespace MathConst; @@ -50,7 +52,10 @@ using namespace MathSpecial; template PairExp6rxKokkos::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) { - + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; } /* ---------------------------------------------------------------------- */ @@ -63,6 +68,39 @@ PairExp6rxKokkos::~PairExp6rxKokkos() /* ---------------------------------------------------------------------- */ +template +void PairExp6rxKokkos::init_style() +{ + PairExp6rxKokkos::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + neighbor->requests[irequest]->ghost = 1; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + neighbor->requests[irequest]->ghost = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + template void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) { @@ -270,14 +308,14 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute KOKKOS_INLINE_FUNCTION @@ -73,9 +77,6 @@ class PairExp6rxKokkos : public PairExp6rx { KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxCompute, const int&) const; - KOKKOS_INLINE_FUNCTION - void operator()(TagPairExp6rxgetParamsEXP6, const int&) const; - template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp new file mode 100644 index 0000000000..de70ae86f5 --- /dev/null +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -0,0 +1,791 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------------------------- + Contributing authors: + Stan Moore (Sandia) + + Please cite the related publications: + J.D. Moore, B.C. Barnes, S. Izvekov, M. Lisal, M.S. Sellers, D.E. Taylor & J.K. Brennan + "A coarse-grain force field for RDX: Density dependent and energy conserving" + The Journal of Chemical Physics, 2016, 144, 104501. +------------------------------------------------------------------------------------------- */ + +#include +#include +#include "math_const.h" +#include +#include +#include "pair_multi_lucy_rx_kokkos.h" +#include "atom_kokkos.h" +#include "force.h" +#include "comm.h" +#include "neigh_list.h" +#include "memory.h" +#include "error.h" +#include "citeme.h" +#include "modify.h" +#include "fix.h" +#include "atom_masks.h" +#include "neigh_request.h" + +using namespace LAMMPS_NS; + +enum{NONE,RLINEAR,RSQ}; + +#define MAXLINE 1024 + +#define oneFluidParameter (-1) +#define isOneFluid(_site) ( (_site) == oneFluidParameter ) + +static const char cite_pair_multi_lucy_rx[] = + "pair_style multi/lucy/rx command:\n\n" + "@Article{Moore16,\n" + " author = {J.D. Moore, B.C. Barnes, S. Izvekov, M. Lisal, M.S. Sellers, D.E. Taylor and J. K. Brennan},\n" + " title = {A coarse-grain force field for RDX: Density dependent and energy conserving},\n" + " journal = {J. Chem. Phys.},\n" + " year = 2016,\n" + " volume = 144\n" + " pages = {104501}\n" + "}\n\n"; + +/* ---------------------------------------------------------------------- */ + +template +PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMultiLucyRX(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairMultiLucyRXKokkos::~PairMultiLucyRXKokkos() +{ + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::init_style() +{ + PairMultiLucyRX::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + neighbor->requests[irequest]->ghost = 1; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + neighbor->requests[irequest]->ghost = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + double evdwl,evdwlOld; + + evdwlOld = 0.0; + evdwl = 0.0; + if (neighflag == FULL) no_virial_fdotr_compute = 1; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); + dvector = atomKK->k_dvector.view(); + rho = atomKK->k_rho.view(); + + nlocal = atom->nlocal; + int nghost = atom->nghost; + int newton_pair = force->newton_pair; + + { + const int ntotal = nlocal + nghost; + d_fractionOld1 = typename AT::t_float_1d("PairMultiLucyRX::fractionOld1",ntotal); + d_fractionOld2 = typename AT::t_float_1d("PairMultiLucyRX::fractionOld2",ntotal); + d_fraction1 = typename AT::t_float_1d("PairMultiLucyRX::fraction1",ntotal); + d_fraction2 = typename AT::t_float_1d("PairMultiLucyRX::fraction2",ntotal); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,ntotal),*this); + } + + const int inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + computeLocalDensity(); + + // loop over neighbors of my atoms + + EV_FLOAT ev; + + if (evflag) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXgetParams, const int &i) const { + getParams(i, d_fractionOld1[i], d_fractionOld2[i], d_fraction1[i], d_fraction2[i]); +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii, EV_FLOAT& ev) const { + int i,j,jj,inum,jnum,itype,jtype,itable; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; + double rsq; + + double fractionOld1_i,fractionOld1_j; + double fractionOld2_i,fractionOld2_j; + double fraction1_i; + + double pi = MathConst::MY_PI; + double A_i, A_j; + double fraction_i,fraction_j; + int jtable; + + Table *tb; + + int tlm1 = tablength - 1; + + i = d_ilist[ii]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + itype = type[i]; + jnum = d_numneigh[i]; + + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + + fractionOld1_i = d_fractionOld1[i]; + fractionOld2_i = d_fractionOld2[i]; + fraction1_i = d_fraction1[i]; + + for (jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + delx = xtmp - x(j,0); + dely = ytmp - x(j,1); + delz = ztmp - x(j,2); + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < d_cutsq(itype,jtype)) { // optimize + fpair = 0.0; + + fractionOld1_j = d_fractionOld1[j]; + fractionOld2_j = d_fractionOld2[j]; + + tb = &tables[tabindex[itype][jtype]]; + if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){ + //printf("Table inner cutoff = %lf\n",sqrt(tb->innersq)); + //printf("rho[%d]=%lf\n",i,rho[i]); + //printf("rho[%d]=%lf\n",j,rho[j]); + error->one(FLERR,"Density < table inner cutoff"); + } + if (tabstyle == LOOKUP) { + itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + if (itable >= tlm1 || jtable >= tlm1){ + //printf("Table outer index = %d\n",tlm1); + //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); + //printf("jtableIndex=%d rho[%d]=%lf\n",jtable,j,rho[j]); + error->one(FLERR,"Density > table outer cutoff"); + } + A_i = tb->f[itable]; + A_j = tb->f[jtable]; + + const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); + fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; + fpair /= sqrt(rsq); + + } else if (tabstyle == LINEAR) { + itable = static_cast ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); + jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + if (itable >= tlm1 || jtable >= tlm1){ + //printf("Table outer index = %d\n",tlm1); + //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); + //printf("jtableIndex=%d rho[%d]=%lf\n",jtable,j,rho[j]); + error->one(FLERR,"Density > table outer cutoff"); + } + if(itable<0) itable=0; + if(itable>=tlm1) itable=tlm1; + if(jtable<0) jtable=0; + if(jtable>=tlm1)jtable=tlm1; + + fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta); + if(itable==0) fraction_i=0.0; + if(itable==tlm1) fraction_i=0.0; + if(jtable==0) fraction_j=0.0; + if(jtable==tlm1) fraction_j=0.0; + + A_i = tb->f[itable] + fraction_i*tb->df[itable]; + A_j = tb->f[jtable] + fraction_j*tb->df[jtable]; + + const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); + fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; + fpair /= sqrt(rsq); + + } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); + + if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; + else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair; + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } + //if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz); + if (EVFLAG) this->template ev_tally(ev,i,j,0.0,fpair,delx,dely,delz); + } + } + + f(i,0) += fx_i; + f(i,1) += fy_i; + f(i,2) += fz_i; + + tb = &tables[tabindex[itype][itype]]; + itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + if (tabstyle == LOOKUP) evdwl = tb->e[itable]; + else if (tabstyle == LINEAR){ + if (itable >= tlm1){ + //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); + error->one(FLERR,"Density > table outer cutoff"); + } + if(itable==0) fraction_i=0.0; + else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + evdwl = tb->e[itable] + fraction_i*tb->de[itable]; + } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); + + evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; + evdwlOld = fractionOld1_i*evdwl; + evdwl = fraction1_i*evdwl; + + uCG[i] += evdwlOld; + uCGnew[i] += evdwl; + + evdwl = evdwlOld; + + //if (evflag) ev_tally(0,0,nlocal,newton_pair,evdwl,0.0,0.0,0.0,0.0,0.0); + if (EVFLAG) ev.evdwl += evdwl; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairMultiLucyRXCompute(), ii, ev); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::coeff(int narg, char **arg) +{ + if (narg != 6 && narg != 7) error->all(FLERR,"Illegal pair_coeff command"); + + bool rx_flag = false; + for (int i = 0; i < modify->nfix; i++) + if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true; + if (!rx_flag) error->all(FLERR,"PairMultiLucyRXKokkos requires a fix rx command."); + + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + int me; + MPI_Comm_rank(world,&me); + tables = (Table *) + memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); + Table *tb = &tables[ntables]; + null_table(tb); + if (me == 0) read_table(tb,arg[2],arg[3]); + bcast_table(tb); + + nspecies = atom->nspecies_dpd; + int n; + n = strlen(arg[3]) + 1; + site1 = new char[n]; + strcpy(site1,arg[4]); + + n = strlen(arg[4]) + 1; + site2 = new char[n]; + strcpy(site2,arg[5]); + + // set table cutoff + + if (narg == 7) tb->cut = force->numeric(FLERR,arg[6]); + else if (tb->rflag) tb->cut = tb->rhi; + else tb->cut = tb->rfile[tb->ninput-1]; + + // error check on table parameters + // insure cutoff is within table + + if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); + if (tb->rflag == 0) { + rho_0 = tb->rfile[0]; + } else { + rho_0 = tb->rlo; + } + + tb->match = 0; + if (tabstyle == LINEAR && tb->ninput == tablength && + tb->rflag == RSQ) tb->match = 1; + + // spline read-in values and compute r,e,f vectors within table + + if (tb->match == 0) spline_table(tb); + compute_table(tb); + + // store ptr to table in tabindex + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + tabindex[i][j] = ntables; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); + ntables++; + + // Match site* to isite values. + + if (strcmp(site1, "1fluid") == 0) + isite1 = oneFluidParameter; + else { + isite1 = nspecies; + for (int ispecies = 0; ispecies < nspecies; ++ispecies) + if (strcmp(site1, atom->dname[ispecies]) == 0){ + isite1 = ispecies; + break; + } + + if (isite1 == nspecies) + error->all(FLERR,"Pair_multi_lucy_rx site1 is invalid."); + } + + if (strcmp(site2, "1fluid") == 0) + isite2 = oneFluidParameter; + else { + isite2 = nspecies; + for (int ispecies = 0; ispecies < nspecies; ++ispecies) + if (strcmp(site2, atom->dname[ispecies]) == 0){ + isite2 = ispecies; + break; + } + + if (isite2 == nspecies) + error->all(FLERR,"Pair_multi_lucy_rx site2 is invalid."); + } + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::computeLocalDensity() +{ + x = atomKK->k_x.view(); + type = atomKK->k_type.view(); + rho = atomKK->k_rho.view(); + nlocal = atom->nlocal; + + //sync + + const int inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + const double pi = MathConst::MY_PI; + + const bool newton_pair = force->newton_pair; + one_type = (atom->ntypes == 1); + + // Special cut-off values for when there's only one type. + cutsq_type11 = cutsq[1][1]; + rcut_type11 = sqrt(cutsq_type11); + factor_type11 = 84.0/(5.0*pi*rcut_type11*rcut_type11*rcut_type11); + + // zero out density + int m = nlocal; + if (newton_pair) m += atom->nghost; + Kokkos::parallel_for(Kokkos::RangePolicy(0,m),*this); + +// rho = density at each atom +// loop over neighbors of my atoms + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + + if (newton_pair) comm->reverse_comm_pair(this); + + comm->forward_comm_pair(this); +} + +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXZero, const int &i) const { + rho[i] = 0.0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLocalDensity, const int &ii) const { + const int i = d_ilist[ii]; + + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); + + double rho_i = rho[i]; + + const int itype = type[i]; + const int jnum = d_numneigh[i]; + + const double pi = MathConst::MY_PI; + + for (int jj = 0; jj < jnum; jj++){ + const int j = (d_neighbors(i,jj) & NEIGHMASK); + const int jtype = type[j]; + + const double delx = xtmp - x(j,0); + const double dely = ytmp - x(j,1); + const double delz = ztmp - x(j,2); + const double rsq = delx*delx + dely*dely + delz*delz; + + if (one_type) { + if (rsq < cutsq_type11) { + const double rcut = rcut_type11; + const double r_over_rcut = sqrt(rsq) / rcut; + const double tmpFactor = 1.0 - r_over_rcut; + const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; + const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4; + rho_i += factor; + if (NEWTON_PAIR || j < nlocal) + rho[j] += factor; + } else if (rsq < d_cutsq(itype,jtype)) { + const double rcut = sqrt(d_cutsq(itype,jtype)); + const double tmpFactor = 1.0-sqrt(rsq)/rcut; + const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; + const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; + rho_i += factor; + if (NEWTON_PAIR || j < nlocal) + rho[j] += factor; + } + } + } + + rho[i] = rho_i; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::getParams(int id, double &fractionOld1, double &fractionOld2, double &fraction1, double &fraction2) const +{ + double fractionOld, fraction; + double nTotal, nTotalOld; + + nTotal = 0.0; + nTotalOld = 0.0; + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + nTotal += dvector(ispecies,id); + nTotalOld += dvector(ispecies+nspecies,id); + } + + if (isOneFluid(isite1) == false){ + fractionOld1 = dvector(isite1+nspecies,id)/nTotalOld; + fraction1 = dvector(isite1,id)/nTotal; + } + if (isOneFluid(isite2) == false){ + fractionOld2 = dvector(isite2+nspecies,id)/nTotalOld; + fraction2 = dvector(isite2,id)/nTotal; + } + + if (isOneFluid(isite1) || isOneFluid(isite2)){ + fractionOld = 0.0; + fraction = 0.0; + + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + if (isite1 == ispecies || isite2 == ispecies) continue; + fractionOld += dvector(ispecies+nspecies,id) / nTotalOld; + fraction += dvector(ispecies,id) / nTotal; + } + if (isOneFluid(isite1)){ + fractionOld1 = fractionOld; + fraction1 = fraction; + } + if (isOneFluid(isite2)){ + fractionOld2 = fractionOld; + fraction2 = fraction; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +int PairMultiLucyRXKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) +{ + int i,j,m; + rho = atomKK->k_rho.view(); + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = rho[j]; + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + int i,m,last; + rho = atomKK->k_rho.view(); + + m = 0; + last = first + n; + for (i = first; i < last; i++) rho[i] = buf[m++]; +} + +/* ---------------------------------------------------------------------- */ + +template +int PairMultiLucyRXKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + rho = atomKK->k_rho.view(); + + m = 0; + last = first + n; + for (i = first; i < last; i++) buf[m++] = rho[i]; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + rho = atomKK->k_rho.view(); + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + rho[j] += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (EFLAG) { + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class PairMultiLucyRXKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairMultiLucyRXKokkos; +#endif +} diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h new file mode 100644 index 0000000000..74a10ddee1 --- /dev/null +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -0,0 +1,215 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(multi/lucy/rx/kk,PairMultiLucyRXKokkos) +PairStyle(multi/lucy/rx/kk/device,PairMultiLucyRXKokkos) +PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos) + +#else + +#ifndef LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H +#define LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H + + +#include "pair_multi_lucy_rx.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +struct TagPairMultiLucyRXgetParams{}; + +template +struct TagPairMultiLucyRXCompute{}; + +struct TagPairMultiLucyRXZero{}; + +template +struct TagPairMultiLucyRXComputeLocalDensity{}; + +template +class PairMultiLucyRXKokkos : public PairMultiLucyRX { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairMultiLucyRXKokkos(class LAMMPS *); + virtual ~PairMultiLucyRXKokkos(); + + void compute(int, int); + void init_style(); + void coeff(int, char **); + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + void computeLocalDensity(); + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXgetParams, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXCompute, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXCompute, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXZero, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXComputeLocalDensity, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + private: + int nlocal; + int neighflag; + int eflag,vflag; + + bool one_type; + double cutsq_type11; + double rcut_type11; + double factor_type11; + + //struct Table { + // int ninput,rflag,fpflag,match; + // double rlo,rhi,fplo,fphi,cut; + // double *rfile,*efile,*ffile; + // double *e2file,*f2file; + // double innersq,delta,invdelta,deltasq6; + // double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; + //}; + //Table *tables; + + int **tabindex; + + //void read_table(Table *, char *, char *); + //void param_extract(Table *, char *); + + char *site1, *site2; + + KOKKOS_INLINE_FUNCTION + void getParams(int, double &, double &, double &, double &) const; + + typename AT::t_float_1d d_fractionOld1,d_fractionOld2,d_fraction1,d_fraction2; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_efloat_1d rho; + typename AT::t_efloat_1d uCG, uCGnew; + typename AT::t_float_2d dvector; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + + friend void pair_virial_fdotr_compute(PairMultiLucyRXKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pair multi/lucy/rx command requires atom_style with density (e.g. dpd, meso) + +Self-explanatory + +E: Density < table inner cutoff + +The local density inner is smaller than the inner cutoff + +E: Density > table inner cutoff + +The local density inner is greater than the inner cutoff + +E: Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx + +Self-explanatory + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Unknown table style in pair_style command + +Self-explanatory + +E: Illegal number of pair table entries + +There must be at least 2 table entries. + +E: Illegal pair_coeff command + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: PairMultiLucyRXKokkos requires a fix rx command + +The fix rx command must come before the pair style command in the input file + +E: There are no rx species specified + +There must be at least one species specified through the fix rx command + +E: Invalid pair table length + +Length of read-in pair table is invalid + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open file %s + +The specified file cannot be opened. Check that the path and name are +correct. + +E: Did not find keyword in table file + +Keyword used in pair_coeff command was not found in table file. + +E: Invalid keyword in pair table parameters + +Keyword used in list of table parameters is not recognized. + +E: Pair table parameters did not set N + +List of pair table parameters must include N setting. + +*/ diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index 278c5b0a2f..271490bbdd 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Paul Crozier (SNL) + Contributing author: Christian Trott (SNL) ------------------------------------------------------------------------- */ #include @@ -41,7 +41,7 @@ enum{FULL,HALFTHREAD,HALF}; /* ---------------------------------------------------------------------- */ template -PairTableKokkos::PairTableKokkos(LAMMPS *lmp) : Pair(lmp) +PairTableKokkos::PairTableKokkos(LAMMPS *lmp) : PairTable(lmp) { update_table = 0; atomKK = (AtomKokkos *) atom; @@ -98,6 +98,7 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -221,6 +222,7 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c //if (rsq < d_table_const.innersq(tidx)) // error->one(FLERR,"Pair distance < table inner cutoff"); + if (Specialisation::TabStyle == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); //if (itable >= tlm1) @@ -338,8 +340,6 @@ void PairTableKokkos::create_kokkos_tables() memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); } - - for(int i=0; i < ntables; i++) { Table* tb = &tables[i]; @@ -477,85 +477,6 @@ void PairTableKokkos::settings(int narg, char **arg) tables = NULL; } -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::coeff(int narg, char **arg) -{ - if (narg != 4 && narg != 5) error->all(FLERR,"Illegal pair_coeff command"); - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); - force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); - - int me; - MPI_Comm_rank(world,&me); - tables = (Table *) - memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); - Table *tb = &tables[ntables]; - null_table(tb); - if (me == 0) read_table(tb,arg[2],arg[3]); - bcast_table(tb); - - // set table cutoff - - if (narg == 5) tb->cut = force->numeric(FLERR,arg[4]); - else if (tb->rflag) tb->cut = tb->rhi; - else tb->cut = tb->rfile[tb->ninput-1]; - - // error check on table parameters - // insure cutoff is within table - // for BITMAP tables, file values can be in non-ascending order - - if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); - double rlo,rhi; - if (tb->rflag == 0) { - rlo = tb->rfile[0]; - rhi = tb->rfile[tb->ninput-1]; - } else { - rlo = tb->rlo; - rhi = tb->rhi; - } - if (tb->cut <= rlo || tb->cut > rhi) - error->all(FLERR,"Invalid pair table cutoff"); - if (rlo <= 0.0) error->all(FLERR,"Invalid pair table cutoff"); - - // match = 1 if don't need to spline read-in tables - // this is only the case if r values needed by final tables - // exactly match r values read from file - // for tabstyle SPLINE, always need to build spline tables - - tb->match = 0; - if (tabstyle == LINEAR && tb->ninput == tablength && - tb->rflag == RSQ && tb->rhi == tb->cut) tb->match = 1; - if (tabstyle == BITMAP && tb->ninput == 1 << tablength && - tb->rflag == BMP && tb->rhi == tb->cut) tb->match = 1; - if (tb->rflag == BMP && tb->match == 0) - error->all(FLERR,"Bitmapped table in file does not match requested table"); - - // spline read-in values and compute r,e,f vectors within table - - if (tb->match == 0) spline_table(tb); - compute_table(tb); - - // store ptr to table in tabindex - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - tabindex[i][j] = ntables; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); - ntables++; -} - /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -574,677 +495,6 @@ double PairTableKokkos::init_one(int i, int j) return tables[tabindex[i][j]].cut; } -/* ---------------------------------------------------------------------- - read a table section from a tabulated potential file - only called by proc 0 - this function sets these values in Table: - ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::read_table(Table *tb, char *file, char *keyword) -{ - char line[MAXLINE]; - - // open file - - FILE *fp = force->open_potential(file); - if (fp == NULL) { - char str[128]; - sprintf(str,"Cannot open file %s",file); - error->one(FLERR,str); - } - - // loop until section found with matching keyword - - while (1) { - if (fgets(line,MAXLINE,fp) == NULL) - error->one(FLERR,"Did not find keyword in table file"); - if (strspn(line," \t\n\r") == strlen(line)) continue; // blank line - if (line[0] == '#') continue; // comment - char *word = strtok(line," \t\n\r"); - if (strcmp(word,keyword) == 0) break; // matching keyword - fgets(line,MAXLINE,fp); // no match, skip section - param_extract(tb,line); - fgets(line,MAXLINE,fp); - for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp); - } - - // read args on 2nd line of section - // allocate table arrays for file values - - fgets(line,MAXLINE,fp); - param_extract(tb,line); - memory->create(tb->rfile,tb->ninput,"pair:rfile"); - memory->create(tb->efile,tb->ninput,"pair:efile"); - memory->create(tb->ffile,tb->ninput,"pair:ffile"); - - // setup bitmap parameters for table to read in - - tb->ntablebits = 0; - int masklo,maskhi,nmask,nshiftbits; - if (tb->rflag == BMP) { - while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++; - if (1 << tb->ntablebits != tb->ninput) - error->one(FLERR,"Bitmapped table is incorrect length in table file"); - init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits); - } - - // read r,e,f table values from file - // if rflag set, compute r - // if rflag not set, use r from file - - int itmp; - double rtmp; - union_int_float_t rsq_lookup; - - fgets(line,MAXLINE,fp); - for (int i = 0; i < tb->ninput; i++) { - fgets(line,MAXLINE,fp); - sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]); - - if (tb->rflag == RLINEAR) - rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1); - else if (tb->rflag == RSQ) { - rtmp = tb->rlo*tb->rlo + - (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1); - rtmp = sqrt(rtmp); - } else if (tb->rflag == BMP) { - rsq_lookup.i = i << nshiftbits; - rsq_lookup.i |= masklo; - if (rsq_lookup.f < tb->rlo*tb->rlo) { - rsq_lookup.i = i << nshiftbits; - rsq_lookup.i |= maskhi; - } - rtmp = sqrtf(rsq_lookup.f); - } - - tb->rfile[i] = rtmp; - } - - // close file - - fclose(fp); -} - -/* ---------------------------------------------------------------------- - broadcast read-in table info from proc 0 to other procs - this function communicates these values in Table: - ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::bcast_table(Table *tb) -{ - MPI_Bcast(&tb->ninput,1,MPI_INT,0,world); - - int me; - MPI_Comm_rank(world,&me); - if (me > 0) { - memory->create(tb->rfile,tb->ninput,"pair:rfile"); - memory->create(tb->efile,tb->ninput,"pair:efile"); - memory->create(tb->ffile,tb->ninput,"pair:ffile"); - } - - MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world); - MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world); - MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world); - - MPI_Bcast(&tb->rflag,1,MPI_INT,0,world); - if (tb->rflag) { - MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world); - } - MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world); - if (tb->fpflag) { - MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world); - } -} - -/* ---------------------------------------------------------------------- - build spline representation of e,f over entire range of read-in table - this function sets these values in Table: e2file,f2file -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::spline_table(Table *tb) -{ - memory->create(tb->e2file,tb->ninput,"pair:e2file"); - memory->create(tb->f2file,tb->ninput,"pair:f2file"); - - double ep0 = - tb->ffile[0]; - double epn = - tb->ffile[tb->ninput-1]; - spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file); - - if (tb->fpflag == 0) { - tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]); - tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) / - (tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]); - } - - double fp0 = tb->fplo; - double fpn = tb->fphi; - spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file); -} - -/* ---------------------------------------------------------------------- - extract attributes from parameter line in table section - format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi - N is required, other params are optional -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::param_extract(Table *tb, char *line) -{ - tb->ninput = 0; - tb->rflag = NONE; - tb->fpflag = 0; - - char *word = strtok(line," \t\n\r\f"); - while (word) { - if (strcmp(word,"N") == 0) { - word = strtok(NULL," \t\n\r\f"); - tb->ninput = atoi(word); - } else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 || - strcmp(word,"BITMAP") == 0) { - if (strcmp(word,"R") == 0) tb->rflag = RLINEAR; - else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ; - else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP; - word = strtok(NULL," \t\n\r\f"); - tb->rlo = atof(word); - word = strtok(NULL," \t\n\r\f"); - tb->rhi = atof(word); - } else if (strcmp(word,"FP") == 0) { - tb->fpflag = 1; - word = strtok(NULL," \t\n\r\f"); - tb->fplo = atof(word); - word = strtok(NULL," \t\n\r\f"); - tb->fphi = atof(word); - } else { - error->one(FLERR,"Invalid keyword in pair table parameters"); - } - word = strtok(NULL," \t\n\r\f"); - } - - if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N"); -} - -/* ---------------------------------------------------------------------- - compute r,e,f vectors from splined values -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::compute_table(Table *tb) -{ - update_table = 1; - int tlm1 = tablength-1; - - // inner = inner table bound - // cut = outer table bound - // delta = table spacing in rsq for N-1 bins - - double inner; - if (tb->rflag) inner = tb->rlo; - else inner = tb->rfile[0]; - tb->innersq = inner*inner; - tb->delta = (tb->cut*tb->cut - tb->innersq) / tlm1; - tb->invdelta = 1.0/tb->delta; - - // direct lookup tables - // N-1 evenly spaced bins in rsq from inner to cut - // e,f = value at midpt of bin - // e,f are N-1 in length since store 1 value at bin midpt - // f is converted to f/r when stored in f[i] - // e,f are never a match to read-in values, always computed via spline interp - - if (tabstyle == LOOKUP) { - memory->create(tb->e,tlm1,"pair:e"); - memory->create(tb->f,tlm1,"pair:f"); - - double r,rsq; - for (int i = 0; i < tlm1; i++) { - rsq = tb->innersq + (i+0.5)*tb->delta; - r = sqrt(rsq); - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - } - - // linear tables - // N-1 evenly spaced bins in rsq from inner to cut - // rsq,e,f = value at lower edge of bin - // de,df values = delta from lower edge to upper edge of bin - // rsq,e,f are N in length so de,df arrays can compute difference - // f is converted to f/r when stored in f[i] - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == LINEAR) { - memory->create(tb->rsq,tablength,"pair:rsq"); - memory->create(tb->e,tablength,"pair:e"); - memory->create(tb->f,tablength,"pair:f"); - memory->create(tb->de,tlm1,"pair:de"); - memory->create(tb->df,tlm1,"pair:df"); - - double r,rsq; - for (int i = 0; i < tablength; i++) { - rsq = tb->innersq + i*tb->delta; - r = sqrt(rsq); - tb->rsq[i] = rsq; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - } - - for (int i = 0; i < tlm1; i++) { - tb->de[i] = tb->e[i+1] - tb->e[i]; - tb->df[i] = tb->f[i+1] - tb->f[i]; - } - } - - // cubic spline tables - // N-1 evenly spaced bins in rsq from inner to cut - // rsq,e,f = value at lower edge of bin - // e2,f2 = spline coefficient for each bin - // rsq,e,f,e2,f2 are N in length so have N-1 spline bins - // f is converted to f/r after e is splined - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == SPLINE) { - memory->create(tb->rsq,tablength,"pair:rsq"); - memory->create(tb->e,tablength,"pair:e"); - memory->create(tb->f,tablength,"pair:f"); - memory->create(tb->e2,tablength,"pair:e2"); - memory->create(tb->f2,tablength,"pair:f2"); - - tb->deltasq6 = tb->delta*tb->delta / 6.0; - - double r,rsq; - for (int i = 0; i < tablength; i++) { - rsq = tb->innersq + i*tb->delta; - r = sqrt(rsq); - tb->rsq[i] = rsq; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r); - } - } - - // ep0,epn = dh/dg at inner and at cut - // h(r) = e(r) and g(r) = r^2 - // dh/dg = (de/dr) / 2r = -f/2r - - double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq)); - double epn = - tb->f[tlm1] / (2.0 * tb->cut); - spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2); - - // fp0,fpn = dh/dg at inner and at cut - // h(r) = f(r)/r and g(r) = r^2 - // dh/dg = (1/r df/dr - f/r^2) / 2r - // dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1)) - - double fp0,fpn; - double secant_factor = 0.1; - if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) / - (2.0 * sqrt(tb->innersq)); - else { - double rsq1 = tb->innersq; - double rsq2 = rsq1 + secant_factor*tb->delta; - fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) / - sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta); - } - - if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn = - (tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut); - else { - double rsq2 = tb->cut * tb->cut; - double rsq1 = rsq2 - secant_factor*tb->delta; - fpn = (tb->f[tlm1] / sqrt(rsq2) - - splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) / - sqrt(rsq1)) / (secant_factor*tb->delta); - } - - for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]); - spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2); - } - - // bitmapped linear tables - // 2^N bins from inner to cut, spaced in bitmapped manner - // f is converted to f/r when stored in f[i] - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == BITMAP) { - double r; - union_int_float_t rsq_lookup; - int masklo,maskhi; - - // linear lookup tables of length ntable = 2^n - // stored value = value at lower edge of bin - - init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits); - int ntable = 1 << tablength; - int ntablem1 = ntable - 1; - - memory->create(tb->rsq,ntable,"pair:rsq"); - memory->create(tb->e,ntable,"pair:e"); - memory->create(tb->f,ntable,"pair:f"); - memory->create(tb->de,ntable,"pair:de"); - memory->create(tb->df,ntable,"pair:df"); - memory->create(tb->drsq,ntable,"pair:drsq"); - - union_int_float_t minrsq_lookup; - minrsq_lookup.i = 0 << tb->nshiftbits; - minrsq_lookup.i |= maskhi; - - for (int i = 0; i < ntable; i++) { - rsq_lookup.i = i << tb->nshiftbits; - rsq_lookup.i |= masklo; - if (rsq_lookup.f < tb->innersq) { - rsq_lookup.i = i << tb->nshiftbits; - rsq_lookup.i |= maskhi; - } - r = sqrtf(rsq_lookup.f); - tb->rsq[i] = rsq_lookup.f; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); - } - - tb->innersq = minrsq_lookup.f; - - for (int i = 0; i < ntablem1; i++) { - tb->de[i] = tb->e[i+1] - tb->e[i]; - tb->df[i] = tb->f[i+1] - tb->f[i]; - tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]); - } - - // get the delta values for the last table entries - // tables are connected periodically between 0 and ntablem1 - - tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1]; - tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1]; - tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]); - - // get the correct delta values at itablemax - // smallest r is in bin itablemin - // largest r is in bin itablemax, which is itablemin-1, - // or ntablem1 if itablemin=0 - - // deltas at itablemax only needed if corresponding rsq < cut*cut - // if so, compute deltas between rsq and cut*cut - // if tb->match, data at cut*cut is unavailable, so we'll take - // deltas at itablemax-1 as a good approximation - - double e_tmp,f_tmp; - int itablemin = minrsq_lookup.i & tb->nmask; - itablemin >>= tb->nshiftbits; - int itablemax = itablemin - 1; - if (itablemin == 0) itablemax = ntablem1; - int itablemaxm1 = itablemax - 1; - if (itablemax == 0) itablemaxm1 = ntablem1; - rsq_lookup.i = itablemax << tb->nshiftbits; - rsq_lookup.i |= maskhi; - if (rsq_lookup.f < tb->cut*tb->cut) { - if (tb->match) { - tb->de[itablemax] = tb->de[itablemaxm1]; - tb->df[itablemax] = tb->df[itablemaxm1]; - tb->drsq[itablemax] = tb->drsq[itablemaxm1]; - } else { - rsq_lookup.f = tb->cut*tb->cut; - r = sqrtf(rsq_lookup.f); - e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - tb->de[itablemax] = e_tmp - tb->e[itablemax]; - tb->df[itablemax] = f_tmp - tb->f[itablemax]; - tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]); - } - } - } -} - -/* ---------------------------------------------------------------------- - set all ptrs in a table to NULL, so can be freed safely -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::null_table(Table *tb) -{ - tb->rfile = tb->efile = tb->ffile = NULL; - tb->e2file = tb->f2file = NULL; - tb->rsq = tb->drsq = tb->e = tb->de = NULL; - tb->f = tb->df = tb->e2 = tb->f2 = NULL; -} - -/* ---------------------------------------------------------------------- - free all arrays in a table -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::free_table(Table *tb) -{ - memory->destroy(tb->rfile); - memory->destroy(tb->efile); - memory->destroy(tb->ffile); - memory->destroy(tb->e2file); - memory->destroy(tb->f2file); - - memory->destroy(tb->rsq); - memory->destroy(tb->drsq); - memory->destroy(tb->e); - memory->destroy(tb->de); - memory->destroy(tb->f); - memory->destroy(tb->df); - memory->destroy(tb->e2); - memory->destroy(tb->f2); -} - -/* ---------------------------------------------------------------------- - spline and splint routines modified from Numerical Recipes -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::spline(double *x, double *y, int n, - double yp1, double ypn, double *y2) -{ - int i,k; - double p,qn,sig,un; - double *u = new double[n]; - - if (yp1 > 0.99e30) y2[0] = u[0] = 0.0; - else { - y2[0] = -0.5; - u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1); - } - for (i = 1; i < n-1; i++) { - sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]); - p = sig*y2[i-1] + 2.0; - y2[i] = (sig-1.0) / p; - u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]); - u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p; - } - if (ypn > 0.99e30) qn = un = 0.0; - else { - qn = 0.5; - un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2])); - } - y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0); - for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k]; - - delete [] u; -} - -/* ---------------------------------------------------------------------- */ - -template -double PairTableKokkos::splint(double *xa, double *ya, double *y2a, int n, double x) -{ - int klo,khi,k; - double h,b,a,y; - - klo = 0; - khi = n-1; - while (khi-klo > 1) { - k = (khi+klo) >> 1; - if (xa[k] > x) khi = k; - else klo = k; - } - h = xa[khi]-xa[klo]; - a = (xa[khi]-x) / h; - b = (x-xa[klo]) / h; - y = a*ya[klo] + b*ya[khi] + - ((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0; - return y; -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::write_restart(FILE *fp) -{ - write_restart_settings(fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::read_restart(FILE *fp) -{ - read_restart_settings(fp); - allocate(); -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::write_restart_settings(FILE *fp) -{ - fwrite(&tabstyle,sizeof(int),1,fp); - fwrite(&tablength,sizeof(int),1,fp); - fwrite(&ewaldflag,sizeof(int),1,fp); - fwrite(&pppmflag,sizeof(int),1,fp); - fwrite(&msmflag,sizeof(int),1,fp); - fwrite(&dispersionflag,sizeof(int),1,fp); - fwrite(&tip4pflag,sizeof(int),1,fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -template -void PairTableKokkos::read_restart_settings(FILE *fp) -{ - if (comm->me == 0) { - fread(&tabstyle,sizeof(int),1,fp); - fread(&tablength,sizeof(int),1,fp); - fread(&ewaldflag,sizeof(int),1,fp); - fread(&pppmflag,sizeof(int),1,fp); - fread(&msmflag,sizeof(int),1,fp); - fread(&dispersionflag,sizeof(int),1,fp); - fread(&tip4pflag,sizeof(int),1,fp); - } - MPI_Bcast(&tabstyle,1,MPI_INT,0,world); - MPI_Bcast(&tablength,1,MPI_INT,0,world); - MPI_Bcast(&ewaldflag,1,MPI_INT,0,world); - MPI_Bcast(&pppmflag,1,MPI_INT,0,world); - MPI_Bcast(&msmflag,1,MPI_INT,0,world); - MPI_Bcast(&dispersionflag,1,MPI_INT,0,world); - MPI_Bcast(&tip4pflag,1,MPI_INT,0,world); -} - -/* ---------------------------------------------------------------------- */ - -template -double PairTableKokkos::single(int i, int j, int itype, int jtype, double rsq, - double factor_coul, double factor_lj, - double &fforce) -{ - int itable; - double fraction,value,a,b,phi; - int tlm1 = tablength - 1; - - Table *tb = &tables[tabindex[itype][jtype]]; - if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff"); - - if (tabstyle == LOOKUP) { - itable = static_cast ((rsq-tb->innersq) * tb->invdelta); - if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); - fforce = factor_lj * tb->f[itable]; - } else if (tabstyle == LINEAR) { - itable = static_cast ((rsq-tb->innersq) * tb->invdelta); - if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); - fraction = (rsq - tb->rsq[itable]) * tb->invdelta; - value = tb->f[itable] + fraction*tb->df[itable]; - fforce = factor_lj * value; - } else if (tabstyle == SPLINE) { - itable = static_cast ((rsq-tb->innersq) * tb->invdelta); - if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); - b = (rsq - tb->rsq[itable]) * tb->invdelta; - a = 1.0 - b; - value = a * tb->f[itable] + b * tb->f[itable+1] + - ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * - tb->deltasq6; - fforce = factor_lj * value; - } else { - union_int_float_t rsq_lookup; - rsq_lookup.f = rsq; - itable = rsq_lookup.i & tb->nmask; - itable >>= tb->nshiftbits; - fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; - value = tb->f[itable] + fraction*tb->df[itable]; - fforce = factor_lj * value; - } - - if (tabstyle == LOOKUP) - phi = tb->e[itable]; - else if (tabstyle == LINEAR || tabstyle == BITMAP) - phi = tb->e[itable] + fraction*tb->de[itable]; - else - phi = a * tb->e[itable] + b * tb->e[itable+1] + - ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6; - return factor_lj*phi; -} - -/* ---------------------------------------------------------------------- - return the Coulomb cutoff for tabled potentials - called by KSpace solvers which require that all pairwise cutoffs be the same - loop over all tables not just those indexed by tabindex[i][j] since - no way to know which tables are active since pair::init() not yet called -------------------------------------------------------------------------- */ - -template -void *PairTableKokkos::extract(const char *str, int &dim) -{ - if (strcmp(str,"cut_coul") != 0) return NULL; - if (ntables == 0) error->all(FLERR,"All pair coeffs are not set"); - - double cut_coul = tables[0].cut; - for (int m = 1; m < ntables; m++) - if (tables[m].cut != cut_coul) - error->all(FLERR, - "Pair table cutoffs must all be equal to use with KSpace"); - dim = 0; - return &tables[0].cut; -} - template void PairTableKokkos::init_style() { diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 09e64804b4..7c021df61e 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -22,7 +22,7 @@ PairStyle(table/kk/host,PairTableKokkos) #ifndef LMP_PAIR_TABLE_KOKKOS_H #define LMP_PAIR_TABLE_KOKKOS_H -#include "pair.h" +#include "pair_table.h" #include "pair_kokkos.h" #include "neigh_list_kokkos.h" #include "atom_kokkos.h" @@ -38,7 +38,7 @@ template class PairTableComputeFunctor; template -class PairTableKokkos : public Pair { +class PairTableKokkos : public PairTable { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; @@ -59,18 +59,9 @@ class PairTableKokkos : public Pair { const NeighListKokkos &list) const; */ void settings(int, char **); - void coeff(int, char **); double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - double single(int, int, int, int, double, double, double, double &); - void *extract(const char *, int &); - void init_style(); - protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; @@ -107,17 +98,6 @@ class PairTableKokkos : public Pair { typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; }; - struct Table { - int ninput,rflag,fpflag,match,ntablebits; - int nshiftbits,nmask; - double rlo,rhi,fplo,fphi,cut; - double *rfile,*efile,*ffile; - double *e2file,*f2file; - double innersq,delta,invdelta,deltasq6; - double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; - }; - int ntables; - Table *tables; TableDeviceConst d_table_const; TableDevice* d_table; TableHost* h_table; @@ -128,15 +108,6 @@ class PairTableKokkos : public Pair { typename ArrayTypes::t_ffloat_2d d_cutsq; void allocate(); - void read_table(Table *, char *, char *); - void param_extract(Table *, char *); - void bcast_table(Table *); - void spline_table(Table *); - void compute_table(Table *); - void null_table(Table *); - void free_table(Table *); - void spline(double *, double *, int, double, double, double *); - double splint(double *, double *, double *, int, double); typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array_const c_x; @@ -213,11 +184,6 @@ class PairTableKokkos : public Pair { friend void pair_virial_fdotr_compute(PairTableKokkos*); }; - - - - - } #endif @@ -297,4 +263,10 @@ E: Cannot use chosen neighbor list style with lj/cut/kk That style is not supported by Kokkos. + + + */ + + + diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp new file mode 100644 index 0000000000..4c809d98bd --- /dev/null +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -0,0 +1,634 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_table_rx_kokkos.h" +#include "kokkos.h" +#include "atom.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +enum{NONE,RLINEAR,RSQ,BMP}; +enum{FULL,HALFTHREAD,HALF}; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +template +PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTableRX(lmp) +{ + update_table = 0; + atomKK = (AtomKokkos *) atom; + ntables = 0; + tables = NULL; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + h_table = new TableHost(); + d_table = new TableDevice(); +} + +/* ---------------------------------------------------------------------- */ + +template +PairTableRXKokkos::~PairTableRXKokkos() +{ +/* for (int m = 0; m < ntables; m++) free_table(&tables[m]); + memory->sfree(tables); + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(tabindex); + }*/ + delete h_table; + delete d_table; + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTableRXKokkos::compute(int eflag_in, int vflag_in) +{ + if(update_table) + create_kokkos_tables(); + if(tabstyle == LOOKUP) + compute_style(eflag_in,vflag_in); + if(tabstyle == LINEAR) + compute_style(eflag_in,vflag_in); + if(tabstyle == SPLINE) + compute_style(eflag_in,vflag_in); + if(tabstyle == BITMAP) + compute_style(eflag_in,vflag_in); +} + +template +template +void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + atomKK->sync(execution_space,datamask_read); + //k_cutsq.template sync(); + //k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + newton_pair = force->newton_pair; + d_cutsq = d_table->cutsq; + // loop over neighbors of my atoms + + EV_FLOAT ev; + if(atom->ntypes > MAX_TYPES_STACKPARAMS) { + if (neighflag == FULL) { + PairComputeFunctor,FULL,false,S_TableRXCompute > + ff(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + } else if (neighflag == HALFTHREAD) { + PairComputeFunctor,HALFTHREAD,false,S_TableRXCompute > + ff(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + } else if (neighflag == HALF) { + PairComputeFunctor,HALF,false,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); + else Kokkos::parallel_for(list->inum,f); + } else if (neighflag == N2) { + PairComputeFunctor,N2,false,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); + else Kokkos::parallel_for(nlocal,f); + } else if (neighflag == FULLCLUSTER) { + typedef PairComputeFunctor,FULLCLUSTER,false,S_TableRXCompute > + f_type; + f_type f(this,(NeighListKokkos*) list); + #ifdef KOKKOS_HAVE_CUDA + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; + #else + const int teamsize = 1; + #endif + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); + if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); + else Kokkos::parallel_for(config,f); + } + } else { + if (neighflag == FULL) { + PairComputeFunctor,FULL,true,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); + else Kokkos::parallel_for(list->inum,f); + } else if (neighflag == HALFTHREAD) { + PairComputeFunctor,HALFTHREAD,true,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); + else Kokkos::parallel_for(list->inum,f); + } else if (neighflag == HALF) { + PairComputeFunctor,HALF,true,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); + else Kokkos::parallel_for(list->inum,f); + } else if (neighflag == N2) { + PairComputeFunctor,N2,true,S_TableRXCompute > + f(this,(NeighListKokkos*) list); + if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); + else Kokkos::parallel_for(nlocal,f); + } else if (neighflag == FULLCLUSTER) { + typedef PairComputeFunctor,FULLCLUSTER,true,S_TableRXCompute > + f_type; + f_type f(this,(NeighListKokkos*) list); + #ifdef KOKKOS_HAVE_CUDA + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; + #else + const int teamsize = 1; + #endif + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); + if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); + else Kokkos::parallel_for(config,f); + } + } + + if (eflag) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairTableRXKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; + union_int_float_t rsq_lookup; + double fpair; + const int tidx = d_table_const.tabindex(itype,jtype); + //const Table* const tb = &tables[tabindex[itype][jtype]]; + + //if (rsq < d_table_const.innersq(tidx)) + // error->one(FLERR,"Pair distance < table inner cutoff"); + + + if (Specialisation::TabStyle == LOOKUP) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + fpair = d_table_const.f(tidx,itable); + } else if (Specialisation::TabStyle == LINEAR) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); + } else if (Specialisation::TabStyle == SPLINE) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + const double a = 1.0 - b; + fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) + + ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) * + d_table_const.deltasq6(tidx); + } else { + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & d_table_const.nmask(tidx); + itable >>= d_table_const.nshiftbits(tidx); + const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); + fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); + } + return fpair; +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairTableRXKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + (void) i; + (void) j; + double evdwl; + union_int_float_t rsq_lookup; + const int tidx = d_table_const.tabindex(itype,jtype); + //const Table* const tb = &tables[tabindex[itype][jtype]]; + + //if (rsq < d_table_const.innersq(tidx)) + // error->one(FLERR,"Pair distance < table inner cutoff"); + + if (Specialisation::TabStyle == LOOKUP) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + evdwl = d_table_const.e(tidx,itable); + } else if (Specialisation::TabStyle == LINEAR) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); + } else if (Specialisation::TabStyle == SPLINE) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (itable >= tlm1) + // error->one(FLERR,"Pair distance > table outer cutoff"); + const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + const double a = 1.0 - b; + evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) + + ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) * + d_table_const.deltasq6(tidx); + } else { + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & d_table_const.nmask(tidx); + itable >>= d_table_const.nshiftbits(tidx); + const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); + evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); + } + return evdwl; +} + +template +void PairTableRXKokkos::create_kokkos_tables() +{ + const int tlm1 = tablength-1; + + memory->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits"); + memory->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask"); + memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); + + if(tabstyle == LOOKUP) { + memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + } + + if(tabstyle == LINEAR) { + memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + } + + if(tabstyle == SPLINE) { + memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memory->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2"); + memory->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2"); + } + + if(tabstyle == BITMAP) { + int ntable = 1 << tablength; + memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq"); + memory->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f"); + memory->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de"); + memory->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df"); + memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); + } + + for(int i=0; i < ntables; i++) { + Table* tb = &tables[i]; + + h_table->nshiftbits[i] = tb->nshiftbits; + h_table->nmask[i] = tb->nmask; + h_table->innersq[i] = tb->innersq; + h_table->invdelta[i] = tb->invdelta; + h_table->deltasq6[i] = tb->deltasq6; + + for(int j = 0; jrsq.dimension_1(); j++) + h_table->rsq(i,j) = tb->rsq[j]; + for(int j = 0; jdrsq.dimension_1(); j++) + h_table->drsq(i,j) = tb->drsq[j]; + for(int j = 0; je.dimension_1(); j++) + h_table->e(i,j) = tb->e[j]; + for(int j = 0; jde.dimension_1(); j++) + h_table->de(i,j) = tb->de[j]; + for(int j = 0; jf.dimension_1(); j++) + h_table->f(i,j) = tb->f[j]; + for(int j = 0; jdf.dimension_1(); j++) + h_table->df(i,j) = tb->df[j]; + for(int j = 0; je2.dimension_1(); j++) + h_table->e2(i,j) = tb->e2[j]; + for(int j = 0; jf2.dimension_1(); j++) + h_table->f2(i,j) = tb->f2[j]; + } + + + Kokkos::deep_copy(d_table->nshiftbits,h_table->nshiftbits); + Kokkos::deep_copy(d_table->nmask,h_table->nmask); + Kokkos::deep_copy(d_table->innersq,h_table->innersq); + Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); + Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + Kokkos::deep_copy(d_table->drsq,h_table->drsq); + Kokkos::deep_copy(d_table->e,h_table->e); + Kokkos::deep_copy(d_table->de,h_table->de); + Kokkos::deep_copy(d_table->f,h_table->f); + Kokkos::deep_copy(d_table->df,h_table->df); + Kokkos::deep_copy(d_table->e2,h_table->e2); + Kokkos::deep_copy(d_table->f2,h_table->f2); + Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); + + d_table_const.nshiftbits = d_table->nshiftbits; + d_table_const.nmask = d_table->nmask; + d_table_const.innersq = d_table->innersq; + d_table_const.invdelta = d_table->invdelta; + d_table_const.deltasq6 = d_table->deltasq6; + d_table_const.rsq = d_table->rsq; + d_table_const.drsq = d_table->drsq; + d_table_const.e = d_table->e; + d_table_const.de = d_table->de; + d_table_const.f = d_table->f; + d_table_const.df = d_table->df; + d_table_const.e2 = d_table->e2; + d_table_const.f2 = d_table->f2; + + + Kokkos::deep_copy(d_table->cutsq,h_table->cutsq); + update_table = 0; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairTableRXKokkos::allocate() +{ + allocated = 1; + const int nt = atom->ntypes + 1; + + memory->create(setflag,nt,nt,"pair:setflag"); + memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); + memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + + d_table_const.cutsq = d_table->cutsq; + d_table_const.tabindex = d_table->tabindex; + memset(&setflag[0][0],0,nt*nt*sizeof(int)); + memset(&cutsq[0][0],0,nt*nt*sizeof(double)); + memset(&tabindex[0][0],0,nt*nt*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairTableRXKokkos::settings(int narg, char **arg) +{ + if (narg < 2) error->all(FLERR,"Illegal pair_style command"); + + // new settings + + if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP; + else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR; + else if (strcmp(arg[0],"spline") == 0) tabstyle = SPLINE; + else if (strcmp(arg[0],"bitmap") == 0) tabstyle = BITMAP; + else error->all(FLERR,"Unknown table style in pair_style command"); + + tablength = force->inumeric(FLERR,arg[1]); + if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries"); + + // optional keywords + // assert the tabulation is compatible with a specific long-range solver + + int iarg = 2; + while (iarg < narg) { + if (strcmp(arg[iarg],"ewald") == 0) ewaldflag = 1; + else if (strcmp(arg[iarg],"pppm") == 0) pppmflag = 1; + else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1; + else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1; + else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1; + else error->all(FLERR,"Illegal pair_style command"); + iarg++; + } + + // delete old tables, since cannot just change settings + + for (int m = 0; m < ntables; m++) free_table(&tables[m]); + memory->sfree(tables); + + if (allocated) { + memory->destroy(setflag); + + d_table_const.tabindex = d_table->tabindex = typename ArrayTypes::t_int_2d(); + h_table->tabindex = typename ArrayTypes::t_int_2d(); + + d_table_const.cutsq = d_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + h_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + } + allocated = 0; + + ntables = 0; + tables = NULL; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairTableRXKokkos::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + tabindex[j][i] = tabindex[i][j]; + + if(i +void PairTableRXKokkos::init_style() +{ + neighbor->request(this,instance_me); + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == N2) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == FULLCLUSTER) { + neighbor->requests[irequest]->full_cluster = 1; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk"); + } +} + +/* +template template +KOKKOS_INLINE_FUNCTION +void PairTableRXKokkos:: +ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &fpair, + const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int NEWTON_PAIR = newton_pair; + const int VFLAG = vflag_either; + + if (EFLAG) { + if (eflag_atom) { + E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul); + if (NEWTON_PAIR || i < nlocal) eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) eatom[j] += epairhalf; + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG) { + if (NEWTON_PAIR) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + if (i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEWTON_PAIR || i < nlocal) { + d_vatom(i,0) += 0.5*v0; + d_vatom(i,1) += 0.5*v1; + d_vatom(i,2) += 0.5*v2; + d_vatom(i,3) += 0.5*v3; + d_vatom(i,4) += 0.5*v4; + d_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || (NEIGHFLAG && j < nlocal)) { + d_vatom(j,0) += 0.5*v0; + d_vatom(j,1) += 0.5*v1; + d_vatom(j,2) += 0.5*v2; + d_vatom(j,3) += 0.5*v3; + d_vatom(j,4) += 0.5*v4; + d_vatom(j,5) += 0.5*v5; + } + } + } +} +*/ +template +void PairTableRXKokkos::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + eatom = NULL; + vatom = NULL; + h_table=NULL; d_table=NULL; +} + +namespace LAMMPS_NS { +template class PairTableRXKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairTableRXKokkos; +#endif + +} + diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h new file mode 100644 index 0000000000..6f0616cc28 --- /dev/null +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -0,0 +1,269 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(table/rx/kk,PairTableRXKokkos) +PairStyle(table/rx/kk/device,PairTableRXKokkos) +PairStyle(table/rx/kk/host,PairTableRXKokkos) + +#else + +#ifndef LMP_PAIR_TABLE_RX_KOKKOS_H +#define LMP_PAIR_TABLE_RX_KOKKOS_H + +#include "pair_table_rx.h" +#include "pair_kokkos.h" +#include "neigh_list_kokkos.h" +#include "atom_kokkos.h" + +namespace LAMMPS_NS { + +template +struct S_TableRXCompute { + enum {TabStyle = TABSTYLE}; +}; + +template +class PairTableRXComputeFunctor; + +template +class PairTableRXKokkos : public PairTableRX { + public: + + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + + PairTableRXKokkos(class LAMMPS *); + virtual ~PairTableRXKokkos(); + + virtual void compute(int, int); + + template + void compute_style(int, int); + + /*template + KOKKOS_FUNCTION + EV_FLOAT compute_item(const int& i, + const NeighListKokkos &list) const; +*/ + void settings(int, char **); + double init_one(int, int); + void init_style(); + + protected: + enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + + int tabstyle,tablength; + /*struct TableDeviceConst { + typename ArrayTypes::t_ffloat_2d_randomread cutsq; + typename ArrayTypes::t_int_2d_randomread tabindex; + typename ArrayTypes::t_int_1d_randomread nshiftbits,nmask; + typename ArrayTypes::t_ffloat_1d_randomread innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + };*/ + //Its faster not to use texture fetch if the number of tables is less than 32! + struct TableDeviceConst { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_int_1d nshiftbits,nmask; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableDevice { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_int_1d nshiftbits,nmask; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableHost { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_int_1d nshiftbits,nmask; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + TableDeviceConst d_table_const; + TableDevice* d_table; + TableHost* h_table; + + int **tabindex; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typename ArrayTypes::t_ffloat_2d d_cutsq; + + void allocate(); + + typename ArrayTypes::t_x_array_randomread x; + typename ArrayTypes::t_x_array_const c_x; + typename ArrayTypes::t_f_array f; + typename ArrayTypes::t_int_1d_randomread type; + typename ArrayTypes::t_efloat_1d d_eatom; + typename ArrayTypes::t_virial_array d_vatom; + + protected: + int nlocal,nall,eflag,vflag,neighflag,newton_pair; + + int update_table; + void create_kokkos_tables(); + void cleanup_copy(); + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { + return 0; + } + + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + + friend void pair_virial_fdotr_compute(PairTableRXKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pair distance < table inner cutoff + +Two atoms are closer together than the pairwise table allows. + +E: Pair distance > table outer cutoff + +Two atoms are further apart than the pairwise table allows. + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Unknown table style in pair_style command + +Style of table is invalid for use with pair_style table command. + +E: Illegal number of pair table entries + +There must be at least 2 table entries. + +E: Invalid pair table length + +Length of read-in pair table is invalid + +E: Invalid pair table cutoff + +Cutoffs in pair_coeff command are not valid with read-in pair table. + +E: Bitmapped table in file does not match requested table + +Setting for bitmapped table in pair_coeff command must match table +in file exactly. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open file %s + +The specified file cannot be opened. Check that the path and name are +correct. If the file is a compressed file, also check that the gzip +executable can be found and run. + +E: Did not find keyword in table file + +Keyword used in pair_coeff command was not found in table file. + +E: Bitmapped table is incorrect length in table file + +Number of table entries is not a correct power of 2. + +E: Invalid keyword in pair table parameters + +Keyword used in list of table parameters is not recognized. + +E: Pair table parameters did not set N + +List of pair table parameters must include N setting. + +E: Pair table cutoffs must all be equal to use with KSpace + +When using pair style table with a long-range KSpace solver, the +cutoffs for all atom type pairs must all be the same, since the +long-range solver starts at that cutoff. + +E: Cannot use chosen neighbor list style with lj/cut/kk + +That style is not supported by Kokkos. + + + + +*/ \ No newline at end of file diff --git a/src/USER-DPD/pair_multi_lucy.h b/src/USER-DPD/pair_multi_lucy.h index f3c67e4fa4..0a2d2f9885 100644 --- a/src/USER-DPD/pair_multi_lucy.h +++ b/src/USER-DPD/pair_multi_lucy.h @@ -18,7 +18,7 @@ PairStyle(multi/lucy,PairMultiLucy) #else #ifndef LMP_PAIR_MULTI_LUCY_H -#define LMP_PAIR_MUTLI_LUCY_H +#define LMP_PAIR_MULTI_LUCY_H #include "pair.h" diff --git a/src/USER-DPD/pair_multi_lucy_rx.cpp b/src/USER-DPD/pair_multi_lucy_rx.cpp index cd107f1519..431293e823 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.cpp +++ b/src/USER-DPD/pair_multi_lucy_rx.cpp @@ -59,8 +59,7 @@ static const char cite_pair_multi_lucy_rx[] = /* ---------------------------------------------------------------------- */ -PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp), - ntables(0), tables(NULL), tabindex(NULL), site1(NULL), site2(NULL) +PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp) { if (lmp->citeme) lmp->citeme->add(cite_pair_multi_lucy_rx); @@ -69,6 +68,9 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp), ntables = 0; tables = NULL; + tabindex = NULL; + site1 = site2 = NULL; + comm_forward = 1; comm_reverse = 1; diff --git a/src/USER-DPD/pair_multi_lucy_rx.h b/src/USER-DPD/pair_multi_lucy_rx.h index 596a6c684d..2913716c5a 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.h +++ b/src/USER-DPD/pair_multi_lucy_rx.h @@ -18,7 +18,7 @@ PairStyle(multi/lucy/rx,PairMultiLucyRX) #else #ifndef LMP_PAIR_MULTI_LUCY_RX_H -#define LMP_PAIR_MUTLI_LUCY_RX_H +#define LMP_PAIR_MULTI_LUCY_RX_H #include "pair.h" diff --git a/src/pair_table.h b/src/pair_table.h index 6cfd9df832..358491f7cf 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -30,9 +30,9 @@ class PairTable : public Pair { virtual ~PairTable(); virtual void compute(int, int); - void settings(int, char **); + virtual void settings(int, char **); void coeff(int, char **); - double init_one(int, int); + virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); void write_restart_settings(FILE *); @@ -58,7 +58,7 @@ class PairTable : public Pair { int **tabindex; - void allocate(); + virtual void allocate(); void read_table(Table *, char *, char *); void param_extract(Table *, char *); void bcast_table(Table *); From f93c62d3e20301ea53e41a5a6f50b8aa9957d942 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 13 Dec 2016 16:54:00 -0700 Subject: [PATCH 008/267] Reverting accidental change --- src/USER-DPD/pair_multi_lucy_rx.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/USER-DPD/pair_multi_lucy_rx.cpp b/src/USER-DPD/pair_multi_lucy_rx.cpp index 431293e823..cd107f1519 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.cpp +++ b/src/USER-DPD/pair_multi_lucy_rx.cpp @@ -59,7 +59,8 @@ static const char cite_pair_multi_lucy_rx[] = /* ---------------------------------------------------------------------- */ -PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp) +PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp), + ntables(0), tables(NULL), tabindex(NULL), site1(NULL), site2(NULL) { if (lmp->citeme) lmp->citeme->add(cite_pair_multi_lucy_rx); @@ -68,9 +69,6 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp) ntables = 0; tables = NULL; - tabindex = NULL; - site1 = site2 = NULL; - comm_forward = 1; comm_reverse = 1; From 3e2cd6d265db7bbbe97cc2dc00977cead964e67c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 14 Dec 2016 11:46:04 -0700 Subject: [PATCH 009/267] Merging from master to 13Dec16 version --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 1 - src/KOKKOS/pair_exp6_rx_kokkos.cpp | 2 -- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 2 -- src/KOKKOS/pair_table_rx_kokkos.cpp | 35 +---------------------- src/KOKKOS/pair_table_rx_kokkos.h | 10 +------ src/KOKKOS/pair_vashishta_kokkos.cpp | 1 - src/neigh_request.cpp | 1 - src/neigh_request.h | 1 - 8 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index f7e1fecc09..45da5bf165 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -317,7 +317,6 @@ void PairDPDfdtEnergyKokkos::init_style() if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk"); } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index a7d5569537..569d131af7 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -87,12 +87,10 @@ void PairExp6rxKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index de70ae86f5..d1a13b12fd 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -101,12 +101,10 @@ void PairMultiLucyRXKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; neighbor->requests[irequest]->ghost = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 4c809d98bd..bf32d1c14f 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -96,7 +96,7 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); @@ -143,19 +143,6 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); - } else if (neighflag == FULLCLUSTER) { - typedef PairComputeFunctor,FULLCLUSTER,false,S_TableRXCompute > - f_type; - f_type f(this,(NeighListKokkos*) list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); - else Kokkos::parallel_for(config,f); } } else { if (neighflag == FULL) { @@ -178,19 +165,6 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); - } else if (neighflag == FULLCLUSTER) { - typedef PairComputeFunctor,FULLCLUSTER,true,S_TableRXCompute > - f_type; - f_type f(this,(NeighListKokkos*) list); - #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; - #else - const int teamsize = 1; - #endif - const int nteams = (list->inum*+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); - if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); - else Kokkos::parallel_for(config,f); } } @@ -511,19 +485,12 @@ void PairTableRXKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->full_cluster = 0; } else if (neighflag == N2) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; - } else if (neighflag == FULLCLUSTER) { - neighbor->requests[irequest]->full_cluster = 1; - neighbor->requests[irequest]->full = 1; - neighbor->requests[irequest]->half = 0; } else { error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk"); } diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 6f0616cc28..b379901201 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -41,7 +41,7 @@ template class PairTableRXKokkos : public PairTableRX { public: - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER}; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; @@ -141,45 +141,37 @@ class PairTableRXKokkos : public PairTableRX { friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; friend class PairComputeFunctor >; - friend class PairComputeFunctor >; friend void pair_virial_fdotr_compute(PairTableRXKokkos*); }; diff --git a/src/KOKKOS/pair_vashishta_kokkos.cpp b/src/KOKKOS/pair_vashishta_kokkos.cpp index 73e4e04f98..bf3b5bae85 100644 --- a/src/KOKKOS/pair_vashishta_kokkos.cpp +++ b/src/KOKKOS/pair_vashishta_kokkos.cpp @@ -603,7 +603,6 @@ void PairVashishtaKokkos::init_style() if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full_cluster = 0; if (neighflag == FULL) neighbor->requests[irequest]->ghost = 1; else diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index 4a3eb14933..a8ba8496cd 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -39,7 +39,6 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) gran = granhistory = 0; respainner = respamiddle = respaouter = 0; half_from_full = 0; - full_cluster = 0; // only set when command = 1; diff --git a/src/neigh_request.h b/src/neigh_request.h index 0b561710e7..62cb11f830 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -47,7 +47,6 @@ class NeighRequest : protected Pointers { int respainner; // 1 if a rRESPA inner list int respamiddle; // 1 if a rRESPA middle list int respaouter; // 1 if a rRESPA outer list - int full_cluster; // only used by Kokkos pair styles // command_style only set if command = 1 // allows print_pair_info() to access command name From a9d26b3f4aadbf8e7a7aa91190d513b6a02217d2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 14 Dec 2016 12:58:02 -0700 Subject: [PATCH 010/267] Updates to Kokkos files --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 143 ++++++++++++++--------- src/KOKKOS/fix_eos_table_rx_kokkos.h | 21 ++++ src/KOKKOS/pair_exp6_rx_kokkos.cpp | 16 ++- src/KOKKOS/pair_exp6_rx_kokkos.h | 2 + src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 33 +++--- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 2 + 6 files changed, 145 insertions(+), 72 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index a1e0b1a07d..faf490fcc0 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -42,6 +42,9 @@ FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_error_flag = DAT::tdual_int_scalar("fix:error_flag"); + k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag"); } /* ---------------------------------------------------------------------- */ @@ -65,22 +68,33 @@ void FixEOStableRXKokkos::setup(int vflag) dpdTheta= atomKK->k_dpdTheta.view(); uCG = atomKK->k_uCG.view(); uCGnew = atomKK->k_uCGnew.view(); - double duChem; - for (int i = 0; i < nlocal; i++) // parallel_for - if (mask[i] & groupbit){ - duChem = uCG[i] - uCGnew[i]; - uChem[i] += duChem; - uCG[i] = 0.0; - uCGnew[i] = 0.0; - } + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); // Communicate the updated momenta and velocities to all nodes comm->forward_comm_fix(this); - for (int i = 0; i < nlocal; i++) // parallel_for - if (mask[i] & groupbit) - temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + + error_check(); +} + +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::operator()(TagFixEOStableRXSetup, const int &i) const { + if (mask[i] & groupbit) { + const double duChem = uCG[i] - uCGnew[i]; + uChem[i] += duChem; + uCG[i] = 0.0; + uCGnew[i] = 0.0; + } +} + +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::operator()(TagFixEOStableRXTemperatureLookup, const int &i) const { + if (mask[i] & groupbit) + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); } /* ---------------------------------------------------------------------- */ @@ -94,25 +108,28 @@ void FixEOStableRXKokkos::init() uMech = atomKK->k_uMech.view(); uChem = atomKK->k_uChem.view(); dpdTheta= atomKK->k_dpdTheta.view(); - double tmp; - if(this->restart_reset){ - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) - temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); - } else { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - if(dpdTheta[i] <= 0.0) - error->one(FLERR,"Internal temperature <= zero"); - energy_lookup(i,dpdTheta[i],tmp); - uCond[i] = tmp / 2.0; - uMech[i] = tmp / 2.0; - uChem[i] = 0.0; - } - } + if (this->restart_reset) + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + + error_check(); } +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::operator()(TagFixEOStableRXInit, const int &i) const { + double tmp; + if (mask[i] & groupbit) { + if(dpdTheta[i] <= 0.0) + k_error_flag.d_view() = 1; + energy_lookup(i,dpdTheta[i],tmp); + uCond[i] = tmp / 2.0; + uMech[i] = tmp / 2.0; + uChem[i] = 0.0; + } +} /* ---------------------------------------------------------------------- */ @@ -126,12 +143,19 @@ void FixEOStableRXKokkos::post_integrate() uChem = atomKK->k_uChem.view(); dpdTheta= atomKK->k_dpdTheta.view(); - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit){ - temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); - if(dpdTheta[i] <= 0.0) - error->one(FLERR,"Internal temperature <= zero"); - } + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + + error_check(); +} + +template +KOKKOS_INLINE_FUNCTION +void FixEOStableRXKokkos::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const { + if (mask[i] & groupbit){ + temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); + if (dpdTheta[i] <= 0.0) + k_error_flag.d_view() = 1; + } } /* ---------------------------------------------------------------------- */ @@ -152,23 +176,14 @@ void FixEOStableRXKokkos::end_of_step() // Communicate the ghost uCGnew comm->reverse_comm_fix(this); - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit){ - duChem = uCG[i] - uCGnew[i]; - uChem[i] += duChem; - uCG[i] = 0.0; - uCGnew[i] = 0.0; - } + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); // Communicate the updated momenta and velocities to all nodes comm->forward_comm_fix(this); - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit){ - temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); - if(dpdTheta[i] <= 0.0) - error->one(FLERR,"Internal temperature <= zero"); - } + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + + error_check(); } /* ---------------------------------------------------------------------- @@ -242,13 +257,11 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub // Apply the Secant Method for(it=0; itone(FLERR,"NaN detected in secant solver."); + if(isnan(f1) || isnan(f2)) k_error_flag.d_view() = 2; temp = t1; temp = MAX(temp,tb->lo); temp = MIN(temp,tb->hi); - char str[256]; - sprintf(str,"Secant solver did not converge because table bounds were exceeded: it=%d id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf dpdTheta=%lf\n",it,id,ui,thetai,t1,t2,f1,f2,temp); - error->warning(FLERR,str); + k_warning_flag.d_view() = 1; break; } temp = t2 - f2*(t2-t1)/(f2-f1); @@ -260,11 +273,9 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub f2 = u2 - ui; } if(it==maxit){ - char str[256]; - sprintf(str,"Maxit exceeded in secant solver: id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf\n",id,ui,thetai,t1,t2,f1,f2); if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2)) - error->one(FLERR,"NaN detected in secant solver."); - error->one(FLERR,str); + k_error_flag.d_view() = 2; + k_error_flag.d_view() = 3; } thetai = temp; } @@ -346,6 +357,30 @@ void FixEOStableRXKokkos::unpack_reverse_comm(int n, int *list, doub /* ---------------------------------------------------------------------- */ +template +void FixEOStableRXKokkos::error_check() +{ + k_error_flag.template modify(); + k_error_flag.template sync(); + if (k_error_flag.h_view() == 1) + error->one(FLERR,"Internal temperature <= zero"); + else if (k_error_flag.h_view() == 2) + error->one(FLERR,"NaN detected in secant solver."); + else if (k_error_flag.h_view() == 3) + error->one(FLERR,"Maxit exceeded in secant solver."); + + k_warning_flag.template modify(); + k_warning_flag.template sync(); + if (k_warning_flag.h_view()) { + error->warning(FLERR,"Secant solver did not converge because table bounds were exceeded."); + k_warning_flag.h_view() = 0; + k_warning_flag.template modify(); + k_warning_flag.template sync(); + } +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class FixEOStableRXKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h index 9eccd67c54..9b0ca366a0 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.h +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -27,6 +27,11 @@ FixStyle(eos/table/rx/kk/host,FixEOStableRXKokkos) namespace LAMMPS_NS { +struct TagFixEOStableRXInit{}; +struct TagFixEOStableRXSetup{}; +struct TagFixEOStableRXTemperatureLookup{}; +struct TagFixEOStableRXTemperatureLookup2{}; + template class FixEOStableRXKokkos : public FixEOStableRX { public: @@ -41,6 +46,18 @@ class FixEOStableRXKokkos : public FixEOStableRX { void post_integrate(); void end_of_step(); + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEOStableRXInit, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEOStableRXSetup, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEOStableRXTemperatureLookup, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEOStableRXTemperatureLookup2, const int&) const; + KOKKOS_INLINE_FUNCTION void energy_lookup(int, double, double &) const; @@ -59,12 +76,16 @@ class FixEOStableRXKokkos : public FixEOStableRX { //Table *tables, *tables2; void allocate(); + void error_check(); //double *dHf; typename AT::t_int_1d mask; typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem; + DAT::tdual_int_scalar k_error_flag; + DAT::tdual_int_scalar k_warning_flag; + int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); int pack_forward_comm(int , int *, double *, int, int *); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 569d131af7..c46f3d037d 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -56,6 +56,8 @@ PairExp6rxKokkos::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } /* ---------------------------------------------------------------------- */ @@ -168,6 +170,11 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); } + k_error_flag.template modify(); + k_error_flag.template sync(); + if (k_error_flag.h_view()) + error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); d_numneigh = k_list->d_numneigh; @@ -184,6 +191,11 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + k_error_flag.template modify(); + k_error_flag.template sync(); + if (k_error_flag.h_view()) + error->all(FLERR,"alpha_ij is 6.0 in pair exp6"); + if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { virial[0] += ev.v[0]; @@ -358,7 +370,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeall(FLERR,"alpha_ij is 6.0 in pair exp6"); + k_error_flag.d_view() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rmOld12_ij; @@ -774,7 +786,7 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double } } if(nTotal < 1e-8 || nTotal_old < 1e-8) - error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + k_error_flag.d_view() = 1; // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) fractionOFA_old = nTotalOFA_old / nTotal_old; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index b0fbd3d9e5..366cf99d75 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -103,6 +103,8 @@ class PairExp6rxKokkos : public PairExp6rx { DAT::t_efloat_1d d_eatom; DAT::t_virial_array d_vatom; + DAT::tdual_int_scalar k_error_flag; + typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_numneigh; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index d1a13b12fd..bea7cb6b0b 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -70,6 +70,8 @@ PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } /* ---------------------------------------------------------------------- */ @@ -180,6 +182,15 @@ void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + k_error_flag.template modify(); + k_error_flag.template sync(); + if (k_error_flag.h_view() == 1) + error->one(FLERR,"Density < table inner cutoff"); + else if (k_error_flag.h_view() == 2) + error->one(FLERR,"Density > table outer cutoff"); + else if (k_error_flag.h_view() == 3) + error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); + if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { virial[0] += ev.v[0]; @@ -265,19 +276,13 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeinnersq || rho[j]*rho[j] < tb->innersq){ - //printf("Table inner cutoff = %lf\n",sqrt(tb->innersq)); - //printf("rho[%d]=%lf\n",i,rho[i]); - //printf("rho[%d]=%lf\n",j,rho[j]); - error->one(FLERR,"Density < table inner cutoff"); + k_error_flag.d_view() = 1; } if (tabstyle == LOOKUP) { itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); if (itable >= tlm1 || jtable >= tlm1){ - //printf("Table outer index = %d\n",tlm1); - //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); - //printf("jtableIndex=%d rho[%d]=%lf\n",jtable,j,rho[j]); - error->one(FLERR,"Density > table outer cutoff"); + k_error_flag.d_view() = 2; } A_i = tb->f[itable]; A_j = tb->f[jtable]; @@ -290,10 +295,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); if (itable >= tlm1 || jtable >= tlm1){ - //printf("Table outer index = %d\n",tlm1); - //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); - //printf("jtableIndex=%d rho[%d]=%lf\n",jtable,j,rho[j]); - error->one(FLERR,"Density > table outer cutoff"); + k_error_flag.d_view() = 2; } if(itable<0) itable=0; if(itable>=tlm1) itable=tlm1; @@ -314,7 +316,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeone(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); + } else k_error_flag.d_view() = 3; if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair; @@ -341,13 +343,12 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputee[itable]; else if (tabstyle == LINEAR){ if (itable >= tlm1){ - //printf("itableIndex=%d rho[%d]=%lf\n",itable,i,rho[i]); - error->one(FLERR,"Density > table outer cutoff"); + k_error_flag.d_view() = 2; } if(itable==0) fraction_i=0.0; else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); evdwl = tb->e[itable] + fraction_i*tb->de[itable]; - } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); + } else k_error_flag.d_view() = 3; evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; evdwlOld = fractionOld1_i*evdwl; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index 74a10ddee1..ff22516eb1 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -130,6 +130,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_numneigh; + DAT::tdual_int_scalar k_error_flag; + typename AT::tdual_ffloat_2d k_cutsq; typename AT::t_ffloat_2d d_cutsq; From c0d6cbbdd3f135578b584525c92e447c663e2e1b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 15 Dec 2016 11:18:50 -0700 Subject: [PATCH 011/267] Updates to Kokkos files --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 72 ++++ src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 2 +- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 3 + src/KOKKOS/pair_exp6_rx_kokkos.cpp | 68 ++- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 490 ++++++++++++++-------- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 79 +++- src/USER-DPD/fix_eos_table_rx.cpp | 2 + src/USER-DPD/pair_multi_lucy_rx.cpp | 2 + src/USER-DPD/pair_table_rx.cpp | 2 + src/atom_masks.h | 12 + 10 files changed, 533 insertions(+), 199 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index c79559172f..58fc9c46c3 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -1801,6 +1801,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.sync(); if (mask & MASK_MASK) atomKK->k_mask.sync(); if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); + if (mask & UCG_MASK) atomKK->k_uCG.sync(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } else { if (mask & X_MASK) atomKK->k_x.sync(); if (mask & V_MASK) atomKK->k_v.sync(); @@ -1809,6 +1818,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.sync(); if (mask & MASK_MASK) atomKK->k_mask.sync(); if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); + if (mask & UCG_MASK) atomKK->k_uCG.sync(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } } @@ -1831,6 +1849,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy(atomKK->k_mask,space); if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) perform_async_copy(atomKK->k_image,space); + if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync()) + perform_async_copy(atomKK->k_rho,space); + if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync()) + perform_async_copy(atomKK->k_dpdTheta,space); + if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync()) + perform_async_copy(atomKK->k_uCond,space); + if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync()) + perform_async_copy(atomKK->k_uMech,space); + if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync()) + perform_async_copy(atomKK->k_uChem,space); + if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync()) + perform_async_copy(atomKK->k_uCG,space); + if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync()) + perform_async_copy(atomKK->k_uCGnew,space); + if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync()) + perform_async_copy(atomKK->k_duChem,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) + perform_async_copy(atomKK->k_dvector,space); } else { if ((mask & X_MASK) && atomKK->k_x.need_sync()) perform_async_copy(atomKK->k_x,space); @@ -1846,6 +1882,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy(atomKK->k_mask,space); if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync()) perform_async_copy(atomKK->k_image,space); + if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync()) + perform_async_copy(atomKK->k_rho,space); + if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync()) + perform_async_copy(atomKK->k_dpdTheta,space); + if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync()) + perform_async_copy(atomKK->k_uCond,space); + if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync()) + perform_async_copy(atomKK->k_uMech,space); + if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync()) + perform_async_copy(atomKK->k_uChem,space); + if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync()) + perform_async_copy(atomKK->k_uCG,space); + if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync()) + perform_async_copy(atomKK->k_uCGnew,space); + if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync()) + perform_async_copy(atomKK->k_duChem,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) + perform_async_copy(atomKK->k_dvector,space); } } @@ -1861,6 +1915,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.modify(); if (mask & MASK_MASK) atomKK->k_mask.modify(); if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & DPDRHO_MASK) atomKK->k_rho.modify(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify(); + if (mask & UCOND_MASK) atomKK->k_uCond.modify(); + if (mask & UMECH_MASK) atomKK->k_uMech.modify(); + if (mask & UCHEM_MASK) atomKK->k_uChem.modify(); + if (mask & UCG_MASK) atomKK->k_uCG.modify(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.modify(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.modify(); } else { if (mask & X_MASK) atomKK->k_x.modify(); if (mask & V_MASK) atomKK->k_v.modify(); @@ -1869,6 +1932,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & TYPE_MASK) atomKK->k_type.modify(); if (mask & MASK_MASK) atomKK->k_mask.modify(); if (mask & IMAGE_MASK) atomKK->k_image.modify(); + if (mask & DPDRHO_MASK) atomKK->k_rho.modify(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify(); + if (mask & UCOND_MASK) atomKK->k_uCond.modify(); + if (mask & UMECH_MASK) atomKK->k_uMech.modify(); + if (mask & UCHEM_MASK) atomKK->k_uChem.modify(); + if (mask & UCG_MASK) atomKK->k_uCG.modify(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.modify(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.modify(); } } diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index faf490fcc0..75e9b292f9 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -52,7 +52,7 @@ FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char template FixEOStableRXKokkos::~FixEOStableRXKokkos() { - + if (copymode) return; } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 45da5bf165..0bfbb9491e 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -52,6 +52,8 @@ PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDP { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | TAG_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; cutsq = NULL; } @@ -357,6 +359,7 @@ double PairDPDfdtEnergyKokkos::init_one(int i, int j) m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; } k_cutsq.h_view(i,j) = cutone*cutone; + k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j); k_cutsq.template modify(); return cutone; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index c46f3d037d..7e74f39ef0 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -54,8 +54,8 @@ PairExp6rxKokkos::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } @@ -104,6 +104,8 @@ void PairExp6rxKokkos::init_style() template void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) { + copymode = 1; + eflag = eflag_in; vflag = vflag_in; @@ -141,7 +143,9 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) special_coul[3] = force->special_coul[3]; newton_pair = force->newton_pair; - copymode = 1; + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); // Initialize the Exp6 parameter data for both the local // and ghost atoms. Make the parameter data persistent @@ -185,10 +189,22 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev; - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } k_error_flag.template modify(); @@ -246,6 +262,12 @@ template template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii, EV_FLOAT& ev) const { + + // These arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + Kokkos::View::value> > a_uCG = uCG; + Kokkos::View::value> > a_uCGnew = uCGnew; + int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq,r2inv,r6inv,forceExp6,factor_lj; @@ -287,6 +309,12 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxComputetemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } } } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_uCG[i] += uCG_i; + a_uCGnew[i] += uCGnew_i; } template diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index bea7cb6b0b..03bbaf9907 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -68,8 +68,14 @@ PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + update_table = 0; + ntables = 0; + tables = NULL; + h_table = new TableHost(); + d_table = new TableDevice(); k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); } @@ -79,7 +85,10 @@ PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult template PairMultiLucyRXKokkos::~PairMultiLucyRXKokkos() { + if (copymode) return; + delete h_table; + delete d_table; } /* ---------------------------------------------------------------------- */ @@ -109,7 +118,7 @@ void PairMultiLucyRXKokkos::init_style() neighbor->requests[irequest]->half = 1; neighbor->requests[irequest]->ghost = 1; } else { - error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk"); } } @@ -118,6 +127,23 @@ void PairMultiLucyRXKokkos::init_style() template void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) { + if (update_table) + create_kokkos_tables(); + + if (tabstyle == LOOKUP) + compute_style(eflag_in,vflag_in); + else if(tabstyle == LINEAR) + compute_style(eflag_in,vflag_in); +} + +/* ---------------------------------------------------------------------- */ + +template +template +void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in) +{ + copymode = 1; + eflag = eflag_in; vflag = vflag_in; @@ -145,10 +171,14 @@ void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) x = atomKK->k_x.view(); f = atomKK->k_f.view(); type = atomKK->k_type.view(); + rho = atomKK->k_rho.view(); uCG = atomKK->k_uCG.view(); uCGnew = atomKK->k_uCGnew.view(); dvector = atomKK->k_dvector.view(); - rho = atomKK->k_rho.view(); + + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); nlocal = atom->nlocal; int nghost = atom->nghost; @@ -176,10 +206,22 @@ void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev; - if (evflag) { - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - } else { - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } k_error_flag.template modify(); @@ -223,9 +265,13 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXgetParams, } template -template +template KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii, EV_FLOAT& ev) const { +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + int i,j,jj,inum,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq; @@ -239,8 +285,6 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXComputeinnersq || rho[j]*rho[j] < tb->innersq){ + //tb = &tables[tabindex[itype][jtype]]; + const int tidx = d_table_const.tabindex(itype,jtype); + //if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){ + if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){ k_error_flag.d_view() = 1; } - if (tabstyle == LOOKUP) { - itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); - jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + if (TABSTYLE == LOOKUP) { + //itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + itable = static_cast (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + jtable = static_cast (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ k_error_flag.d_view() = 2; } - A_i = tb->f[itable]; - A_j = tb->f[jtable]; + //A_i = tb->f[itable]; + A_i = d_table_const.f(tidx,itable); + //A_j = tb->f[jtable]; + A_j = d_table_const.f(tidx,jtable); const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; fpair /= sqrt(rsq); - } else if (tabstyle == LINEAR) { - itable = static_cast ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); - jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + } else if (TABSTYLE == LINEAR) { + //itable = static_cast ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); + itable = static_cast ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); + jtable = static_cast ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ k_error_flag.d_view() = 2; } @@ -302,15 +354,19 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute=tlm1)jtable=tlm1; - fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); - fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta); + //fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); + //fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta); + fraction_j = (((rho[j]*rho[j]) - d_table_const.rsq(tidx,jtable)) * d_table_const.invdelta(tidx)); if(itable==0) fraction_i=0.0; if(itable==tlm1) fraction_i=0.0; if(jtable==0) fraction_j=0.0; if(jtable==tlm1) fraction_j=0.0; - A_i = tb->f[itable] + fraction_i*tb->df[itable]; - A_j = tb->f[jtable] + fraction_j*tb->df[jtable]; + //A_i = tb->f[itable] + fraction_i*tb->df[itable]; + A_i = d_table_const.f(tidx,itable) + fraction_i*d_table_const.df(tidx,itable); + //A_j = tb->f[jtable] + fraction_j*tb->df[jtable]; + A_j = d_table_const.f(tidx,jtable) + fraction_j*d_table_const.df(tidx,jtable); const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype)); fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor; @@ -325,29 +381,34 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputetemplate ev_tally(ev,i,j,0.0,fpair,delx,dely,delz); } } - f(i,0) += fx_i; - f(i,1) += fy_i; - f(i,2) += fz_i; + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; - tb = &tables[tabindex[itype][itype]]; - itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); - if (tabstyle == LOOKUP) evdwl = tb->e[itable]; - else if (tabstyle == LINEAR){ + //tb = &tables[tabindex[itype][itype]]; + const int tidx = d_table_const.tabindex(itype,itype); + //itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); + itable = static_cast (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + //if (TABSTYLE == LOOKUP) evdwl = tb->e[itable]; + if (TABSTYLE == LOOKUP) evdwl = d_table_const.e(tidx,itable); + else if (TABSTYLE == LINEAR){ if (itable >= tlm1){ k_error_flag.d_view() = 2; } if(itable==0) fraction_i=0.0; - else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); - evdwl = tb->e[itable] + fraction_i*tb->de[itable]; + //else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); + else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); + //evdwl = tb->e[itable] + fraction_i*tb->de[itable]; + evdwl = d_table_const.e(tidx,itable); + fraction_i*d_table_const.de(tidx,itable); } else k_error_flag.d_view() = 3; evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; @@ -364,121 +425,11 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute -template +template KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii) const { +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute, const int &ii) const { EV_FLOAT ev; - this->template operator()(TagPairMultiLucyRXCompute(), ii, ev); -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -template -void PairMultiLucyRXKokkos::coeff(int narg, char **arg) -{ - if (narg != 6 && narg != 7) error->all(FLERR,"Illegal pair_coeff command"); - - bool rx_flag = false; - for (int i = 0; i < modify->nfix; i++) - if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true; - if (!rx_flag) error->all(FLERR,"PairMultiLucyRXKokkos requires a fix rx command."); - - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); - force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); - - int me; - MPI_Comm_rank(world,&me); - tables = (Table *) - memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); - Table *tb = &tables[ntables]; - null_table(tb); - if (me == 0) read_table(tb,arg[2],arg[3]); - bcast_table(tb); - - nspecies = atom->nspecies_dpd; - int n; - n = strlen(arg[3]) + 1; - site1 = new char[n]; - strcpy(site1,arg[4]); - - n = strlen(arg[4]) + 1; - site2 = new char[n]; - strcpy(site2,arg[5]); - - // set table cutoff - - if (narg == 7) tb->cut = force->numeric(FLERR,arg[6]); - else if (tb->rflag) tb->cut = tb->rhi; - else tb->cut = tb->rfile[tb->ninput-1]; - - // error check on table parameters - // insure cutoff is within table - - if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); - if (tb->rflag == 0) { - rho_0 = tb->rfile[0]; - } else { - rho_0 = tb->rlo; - } - - tb->match = 0; - if (tabstyle == LINEAR && tb->ninput == tablength && - tb->rflag == RSQ) tb->match = 1; - - // spline read-in values and compute r,e,f vectors within table - - if (tb->match == 0) spline_table(tb); - compute_table(tb); - - // store ptr to table in tabindex - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - tabindex[i][j] = ntables; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); - ntables++; - - // Match site* to isite values. - - if (strcmp(site1, "1fluid") == 0) - isite1 = oneFluidParameter; - else { - isite1 = nspecies; - for (int ispecies = 0; ispecies < nspecies; ++ispecies) - if (strcmp(site1, atom->dname[ispecies]) == 0){ - isite1 = ispecies; - break; - } - - if (isite1 == nspecies) - error->all(FLERR,"Pair_multi_lucy_rx site1 is invalid."); - } - - if (strcmp(site2, "1fluid") == 0) - isite2 = oneFluidParameter; - else { - isite2 = nspecies; - for (int ispecies = 0; ispecies < nspecies; ++ispecies) - if (strcmp(site2, atom->dname[ispecies]) == 0){ - isite2 = ispecies; - break; - } - - if (isite2 == nspecies) - error->all(FLERR,"Pair_multi_lucy_rx site2 is invalid."); - } - + this->template operator()(TagPairMultiLucyRXCompute(), ii, ev); } /* ---------------------------------------------------------------------- */ @@ -486,12 +437,16 @@ void PairMultiLucyRXKokkos::coeff(int narg, char **arg) template void PairMultiLucyRXKokkos::computeLocalDensity() { + copymode = 1; + x = atomKK->k_x.view(); type = atomKK->k_type.view(); rho = atomKK->k_rho.view(); + h_rho = atomKK->k_rho.h_view; nlocal = atom->nlocal; - //sync + atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK); + atomKK->modified(execution_space,DPDRHO_MASK); const int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); @@ -514,16 +469,34 @@ void PairMultiLucyRXKokkos::computeLocalDensity() if (newton_pair) m += atom->nghost; Kokkos::parallel_for(Kokkos::RangePolicy(0,m),*this); -// rho = density at each atom -// loop over neighbors of my atoms - if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + // rho = density at each atom + // loop over neighbors of my atoms - if (newton_pair) comm->reverse_comm_pair(this); + if (neighflag == HALF) { + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == HALFTHREAD) { + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + + // communicate and sum densities (on the host) + + if (newton_pair) { + atomKK->modified(execution_space,DPDRHO_MASK); + atomKK->sync(Host,DPDRHO_MASK); + comm->reverse_comm_pair(this); + atomKK->modified(Host,DPDRHO_MASK); + atomKK->sync(execution_space,DPDRHO_MASK); + } comm->forward_comm_pair(this); + + copymode = 0; } template @@ -536,6 +509,10 @@ template template KOKKOS_INLINE_FUNCTION void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLocalDensity, const int &ii) const { + + // The rho array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_rho = rho; + const int i = d_ilist[ii]; const double xtmp = x(i,0); @@ -567,7 +544,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4; rho_i += factor; if (NEWTON_PAIR || j < nlocal) - rho[j] += factor; + a_rho[j] += factor; } else if (rsq < d_cutsq(itype,jtype)) { const double rcut = sqrt(d_cutsq(itype,jtype)); const double tmpFactor = 1.0-sqrt(rsq)/rcut; @@ -575,12 +552,12 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; rho_i += factor; if (NEWTON_PAIR || j < nlocal) - rho[j] += factor; + a_rho[j] += factor; } } } - rho[i] = rho_i; + a_rho[i] = rho_i; } /* ---------------------------------------------------------------------- */ @@ -630,16 +607,53 @@ void PairMultiLucyRXKokkos::getParams(int id, double &fractionOld1, /* ---------------------------------------------------------------------- */ +template +int PairMultiLucyRXKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, + int pbc_flag, int *pbc) +{ + d_sendlist = k_sendlist.view(); + iswap = iswap_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); + return n; +} + +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const { + int j = d_sendlist(iswap, i); + v_buf[i] = rho[j]; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); +} + +template +KOKKOS_INLINE_FUNCTION +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXUnpackForwardComm, const int &i) const { + rho[i + first] = v_buf[i]; +} + +/* ---------------------------------------------------------------------- */ + template int PairMultiLucyRXKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int i,j,m; - rho = atomKK->k_rho.view(); m = 0; for (i = 0; i < n; i++) { j = list[i]; - buf[m++] = rho[j]; + buf[m++] = h_rho[j]; } return m; } @@ -650,11 +664,10 @@ template void PairMultiLucyRXKokkos::unpack_forward_comm(int n, int first, double *buf) { int i,m,last; - rho = atomKK->k_rho.view(); m = 0; last = first + n; - for (i = first; i < last; i++) rho[i] = buf[m++]; + for (i = first; i < last; i++) h_rho[i] = buf[m++]; } /* ---------------------------------------------------------------------- */ @@ -663,11 +676,10 @@ template int PairMultiLucyRXKokkos::pack_reverse_comm(int n, int first, double *buf) { int i,m,last; - rho = atomKK->k_rho.view(); m = 0; last = first + n; - for (i = first; i < last; i++) buf[m++] = rho[i]; + for (i = first; i < last; i++) buf[m++] = h_rho[i]; return m; } @@ -677,12 +689,11 @@ template void PairMultiLucyRXKokkos::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,m; - rho = atomKK->k_rho.view(); m = 0; for (i = 0; i < n; i++) { j = list[i]; - rho[j] += buf[m++]; + h_rho[j] += buf[m++]; } } @@ -782,6 +793,145 @@ void PairMultiLucyRXKokkos::ev_tally(EV_FLOAT &ev, const int &i, con /* ---------------------------------------------------------------------- */ +template +void PairMultiLucyRXKokkos::create_kokkos_tables() +{ + const int tlm1 = tablength-1; + + memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); + + if(tabstyle == LOOKUP) { + memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + } + + if(tabstyle == LINEAR) { + memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + } + + for(int i=0; i < ntables; i++) { + Table* tb = &tables[i]; + + h_table->innersq[i] = tb->innersq; + h_table->invdelta[i] = tb->invdelta; + h_table->deltasq6[i] = tb->deltasq6; + + for(int j = 0; jrsq.dimension_1(); j++) + h_table->rsq(i,j) = tb->rsq[j]; + for(int j = 0; jdrsq.dimension_1(); j++) + h_table->drsq(i,j) = tb->drsq[j]; + for(int j = 0; je.dimension_1(); j++) + h_table->e(i,j) = tb->e[j]; + for(int j = 0; jde.dimension_1(); j++) + h_table->de(i,j) = tb->de[j]; + for(int j = 0; jf.dimension_1(); j++) + h_table->f(i,j) = tb->f[j]; + for(int j = 0; jdf.dimension_1(); j++) + h_table->df(i,j) = tb->df[j]; + for(int j = 0; je2.dimension_1(); j++) + h_table->e2(i,j) = tb->e2[j]; + for(int j = 0; jf2.dimension_1(); j++) + h_table->f2(i,j) = tb->f2[j]; + } + + + Kokkos::deep_copy(d_table->innersq,h_table->innersq); + Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); + Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + Kokkos::deep_copy(d_table->drsq,h_table->drsq); + Kokkos::deep_copy(d_table->e,h_table->e); + Kokkos::deep_copy(d_table->de,h_table->de); + Kokkos::deep_copy(d_table->f,h_table->f); + Kokkos::deep_copy(d_table->df,h_table->df); + Kokkos::deep_copy(d_table->e2,h_table->e2); + Kokkos::deep_copy(d_table->f2,h_table->f2); + Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); + + d_table_const.innersq = d_table->innersq; + d_table_const.invdelta = d_table->invdelta; + d_table_const.deltasq6 = d_table->deltasq6; + d_table_const.rsq = d_table->rsq; + d_table_const.drsq = d_table->drsq; + d_table_const.e = d_table->e; + d_table_const.de = d_table->de; + d_table_const.f = d_table->f; + d_table_const.df = d_table->df; + d_table_const.e2 = d_table->e2; + d_table_const.f2 = d_table->f2; + + + Kokkos::deep_copy(d_table->cutsq,h_table->cutsq); + update_table = 0; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::allocate() +{ + allocated = 1; + const int nt = atom->ntypes + 1; + + memory->create(setflag,nt,nt,"pair:setflag"); + memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); + memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + + d_table_const.cutsq = d_table->cutsq; + d_table_const.tabindex = d_table->tabindex; + memset(&setflag[0][0],0,nt*nt*sizeof(int)); + memset(&cutsq[0][0],0,nt*nt*sizeof(double)); + memset(&tabindex[0][0],0,nt*nt*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +template +void PairMultiLucyRXKokkos::settings(int narg, char **arg) +{ + if (narg < 2) error->all(FLERR,"Illegal pair_style command"); + + // new settings + + if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP; + else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR; + else error->all(FLERR,"Unknown table style in pair_style command"); + + tablength = force->inumeric(FLERR,arg[1]); + if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries"); + + // delete old tables, since cannot just change settings + + for (int m = 0; m < ntables; m++) free_table(&tables[m]); + memory->sfree(tables); + + if (allocated) { + memory->destroy(setflag); + + d_table_const.tabindex = d_table->tabindex = typename ArrayTypes::t_int_2d(); + h_table->tabindex = typename ArrayTypes::t_int_2d(); + + d_table_const.cutsq = d_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + h_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + } + allocated = 0; + + ntables = 0; + tables = NULL; +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class PairMultiLucyRXKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index ff22516eb1..a259588f78 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -29,9 +29,12 @@ PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos) namespace LAMMPS_NS { +struct TagPairMultiLucyRXPackForwardComm{}; +struct TagPairMultiLucyRXUnpackForwardComm{}; + struct TagPairMultiLucyRXgetParams{}; -template +template struct TagPairMultiLucyRXCompute{}; struct TagPairMultiLucyRXZero{}; @@ -50,24 +53,37 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { virtual ~PairMultiLucyRXKokkos(); void compute(int, int); + void settings(int, char **); + + template + void compute_style(int, int); + void init_style(); - void coeff(int, char **); + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); void computeLocalDensity(); + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXPackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const; + KOKKOS_INLINE_FUNCTION void operator()(TagPairMultiLucyRXgetParams, const int&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXCompute, const int&, EV_FLOAT&) const; + void operator()(TagPairMultiLucyRXCompute, const int&, EV_FLOAT&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXCompute, const int&) const; + void operator()(TagPairMultiLucyRXCompute, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairMultiLucyRXZero, const int&) const; @@ -92,6 +108,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { double rcut_type11; double factor_type11; + enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + //struct Table { // int ninput,rflag,fpflag,match; // double rlo,rhi,fplo,fphi,cut; @@ -100,14 +118,47 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { // double innersq,delta,invdelta,deltasq6; // double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; //}; - //Table *tables; + + int tabstyle,tablength; + /*struct TableDeviceConst { + typename ArrayTypes::t_ffloat_2d_randomread cutsq; + typename ArrayTypes::t_int_2d_randomread tabindex; + typename ArrayTypes::t_ffloat_1d_randomread innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + };*/ + //Its faster not to use texture fetch if the number of tables is less than 32! + struct TableDeviceConst { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableDevice { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + struct TableHost { + typename ArrayTypes::t_ffloat_2d cutsq; + typename ArrayTypes::t_int_2d tabindex; + typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; + typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + }; + + TableDeviceConst d_table_const; + TableDevice* d_table; + TableHost* h_table; int **tabindex; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; - //void read_table(Table *, char *, char *); - //void param_extract(Table *, char *); - - char *site1, *site2; + void allocate(); + int update_table; + void create_kokkos_tables(); + void cleanup_copy(); KOKKOS_INLINE_FUNCTION void getParams(int, double &, double &, double &, double &) const; @@ -118,6 +169,7 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { typename AT::t_f_array f; typename AT::t_int_1d_randomread type; typename AT::t_efloat_1d rho; + typename HAT::t_efloat_1d h_rho; typename AT::t_efloat_1d uCG, uCGnew; typename AT::t_float_2d dvector; @@ -135,6 +187,11 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { typename AT::tdual_ffloat_2d k_cutsq; typename AT::t_ffloat_2d d_cutsq; + int iswap; + int first; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + friend void pair_virial_fdotr_compute(PairMultiLucyRXKokkos*); }; diff --git a/src/USER-DPD/fix_eos_table_rx.cpp b/src/USER-DPD/fix_eos_table_rx.cpp index e10ce96089..8871bdd176 100644 --- a/src/USER-DPD/fix_eos_table_rx.cpp +++ b/src/USER-DPD/fix_eos_table_rx.cpp @@ -127,6 +127,8 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) : FixEOStableRX::~FixEOStableRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) { free_table(&tables[m]); free_table(&tables2[m]); diff --git a/src/USER-DPD/pair_multi_lucy_rx.cpp b/src/USER-DPD/pair_multi_lucy_rx.cpp index cd107f1519..6b5c7cf40a 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.cpp +++ b/src/USER-DPD/pair_multi_lucy_rx.cpp @@ -78,6 +78,8 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp), PairMultiLucyRX::~PairMultiLucyRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index 902d0e5bb4..463e1838c6 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -50,6 +50,8 @@ PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp) PairTableRX::~PairTableRX() { + if (copymode) return; + for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); diff --git a/src/atom_masks.h b/src/atom_masks.h index 119f09f273..8e29448488 100644 --- a/src/atom_masks.h +++ b/src/atom_masks.h @@ -42,6 +42,18 @@ #define ENERGY_MASK 0x00010000 #define VIRIAL_MASK 0x00020000 +// DPD + +#define DPDRHO_MASK 0x00040000 +#define DPDTHETA_MASK 0x00080000 +#define UCOND_MASK 0x00100000 +#define UMECH_MASK 0x00200000 +#define UCHEM_MASK 0x00400000 +#define UCG_MASK 0x00800000 +#define UCGNEW_MASK 0x01000000 +#define DUCHEM_MASK 0x02000000 +#define DVECTOR_MASK 0x04000000 + // granular #define RADIUS_MASK 0x00100000 From d5f8f36442bfc14ba49c4090f465f87afc65a24e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 15 Dec 2016 15:48:09 -0700 Subject: [PATCH 012/267] Change to fix_property_atom to allow virtual override of grow_arrays() function --- src/fix_property_atom.cpp | 3 ++- src/fix_property_atom.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index b83aadc95d..002260d8f0 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -134,7 +134,6 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : // register with Atom class nmax_old = 0; - grow_arrays(atom->nmax); atom->add_callback(0); atom->add_callback(1); if (border) atom->add_callback(2); @@ -190,6 +189,8 @@ int FixPropertyAtom::setmask() void FixPropertyAtom::init() { + grow_arrays(atom->nmax); + // error if atom style has changed since fix was defined // don't allow this b/c user could change to style that defines molecule,q diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index 77a41f393a..d923d76cac 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class FixPropertyAtom : public Fix { public: FixPropertyAtom(class LAMMPS *, int, char **); - ~FixPropertyAtom(); + virtual ~FixPropertyAtom(); int setmask(); void init(); @@ -38,7 +38,7 @@ class FixPropertyAtom : public Fix { void write_data_section_keyword(int, FILE *); void write_data_section(int, FILE *, int, double **, int); - void grow_arrays(int); + virtual void grow_arrays(int); void copy_arrays(int, int, int); int pack_border(int, int *, double *); int unpack_border(int, int, double *); @@ -50,7 +50,7 @@ class FixPropertyAtom : public Fix { int maxsize_restart(); double memory_usage(); - private: + protected: int nvalue,border; int molecule_flag,q_flag,rmass_flag; int *style,*index; From a3c1d385e84a68721433eaaf318513962c489657 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 15 Dec 2016 15:50:30 -0700 Subject: [PATCH 013/267] Adding Kokkos version of fix_property_atom --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/fix_property_atom_kokkos.cpp | 72 ++++++++++++++++++++ src/KOKKOS/fix_property_atom_kokkos.h | 90 +++++++++++++++++++++++++ 3 files changed, 164 insertions(+) create mode 100644 src/KOKKOS/fix_property_atom_kokkos.cpp create mode 100644 src/KOKKOS/fix_property_atom_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 707ea1e986..a1830163bd 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -87,6 +87,8 @@ action fix_nve_kokkos.cpp action fix_nve_kokkos.h action fix_nvt_kokkos.cpp action fix_nvt_kokkos.h +action fix_property_atom_kokkos.cpp +action fix_property_atom_kokkos.h action fix_qeq_reax_kokkos.cpp fix_qeq_reax.cpp action fix_qeq_reax_kokkos.h fix_qeq_reax.h action fix_reaxc_bonds_kokkos.cpp fix_reaxc_bonds.cpp diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp new file mode 100644 index 0000000000..327563efbd --- /dev/null +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -0,0 +1,72 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include "fix_property_atom_kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{MOLECULE,CHARGE,RMASS,INTEGER,DOUBLE}; + +/* ---------------------------------------------------------------------- */ + +FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg) : + FixPropertyAtom(lmp, narg, arg) +{ + atomKK = (AtomKokkos *) atom; +} + +/* ---------------------------------------------------------------------- + allocate atom-based arrays + initialize new values to 0, + since AtomVec class won't do it as atoms are added, + e.g. in create_atom() or data_atom() +------------------------------------------------------------------------- */ + +void FixPropertyAtomKokkos::grow_arrays(int nmax) +{ + for (int m = 0; m < nvalue; m++) { + if (style[m] == MOLECULE) { + memory->grow(atom->molecule,nmax,"atom:molecule"); + size_t nbytes = (nmax-nmax_old) * sizeof(tagint); + memset(&atom->molecule[nmax_old],0,nbytes); + } else if (style[m] == CHARGE) { + memory->grow(atom->q,nmax,"atom:q"); + size_t nbytes = (nmax-nmax_old) * sizeof(double); + memset(&atom->q[nmax_old],0,nbytes); + } else if (style[m] == RMASS) { + memory->grow(atom->rmass,nmax,"atom:rmass"); + size_t nbytes = (nmax-nmax_old) * sizeof(double); + memset(&atom->rmass[nmax_old],0,nbytes); + } else if (style[m] == INTEGER) { + memory->grow(atom->ivector[index[m]],nmax,"atom:ivector"); + size_t nbytes = (nmax-nmax_old) * sizeof(int); + memset(&atom->ivector[index[m]][nmax_old],0,nbytes); + } else if (style[m] == DOUBLE) { + memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,nvalue,nmax, + "atom:dvector"); + //memory->grow(atom->dvector[index[m]],nmax,"atom:dvector"); + //size_t nbytes = (nmax-nmax_old) * sizeof(double); + //memset(&atom->dvector[index[m]][nmax_old],0,nbytes); + } + } + + nmax_old = nmax; +} diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h new file mode 100644 index 0000000000..ed1e4d7cfb --- /dev/null +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -0,0 +1,90 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(property/atom/kk,FixPropertyAtomKokkos) + +#else + +#ifndef LMP_FIX_PROPERTY_ATOM_KOKKOS_H +#define LMP_FIX_PROPERTY_ATOM_KOKKOS_H + +#include "fix_property_atom.h" + +namespace LAMMPS_NS { + +class FixPropertyAtomKokkos : public FixPropertyAtom { + public: + FixPropertyAtomKokkos(class LAMMPS *, int, char **); + virtual ~FixPropertyAtomKokkos() {} + + void grow_arrays(int); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Fix property/atom mol when atom_style already has molecule attribute + +Self-explanatory. + +E: Fix property/atom cannot specify mol twice + +Self-explanatory. + +E: Fix property/atom q when atom_style already has charge attribute + +Self-explanatory. + +E: Fix property/atom cannot specify q twice + +Self-explanatory. + +E: Fix property/atom vector name already exists + +The name for an integer or floating-point vector must be unique. + +W: Fix property/atom mol or charge w/out ghost communication + +A model typically needs these properties defined for ghost atoms. + +E: Atom style was redefined after using fix property/atom + +This is not allowed. + +E: Incorrect %s format in data file + +A section of the data file being read by fix property/atom does +not have the correct number of values per line. + +E: Too few lines in %s section of data file + +Self-explanatory. + +E: Invalid atom ID in %s section of data file + +An atom in a section of the data file being read by fix property/atom +has an invalid atom ID that is <= 0 or > the maximum existing atom ID. + +*/ From f47a40b2e4ead7248c601129c9e8a5d82deac91b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 16 Dec 2016 10:02:01 -0700 Subject: [PATCH 014/267] Fixing Kokkos memory deallocation issue --- src/KOKKOS/atom_kokkos.cpp | 13 +++++++++++++ src/atom.cpp | 8 +++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 4a7250e6ab..97b76ba67c 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -73,6 +73,19 @@ AtomKokkos::~AtomKokkos() memory->destroy_kokkos(k_improper_atom2, improper_atom2); memory->destroy_kokkos(k_improper_atom3, improper_atom3); memory->destroy_kokkos(k_improper_atom4, improper_atom4); + + // USER-DPD package + memory->destroy_kokkos(k_uCond,uCond); + memory->destroy_kokkos(k_uMech,uMech); + memory->destroy_kokkos(k_uChem,uChem); + memory->destroy_kokkos(k_uCG,uCG); + memory->destroy_kokkos(k_uCGnew,uCGnew); + memory->destroy_kokkos(k_rho,rho); + memory->destroy_kokkos(k_dpdTheta,dpdTheta); + memory->destroy_kokkos(k_duChem,duChem); + + memory->destroy_kokkos(k_dvector,dvector); + dvector = NULL; } /* ---------------------------------------------------------------------- */ diff --git a/src/atom.cpp b/src/atom.cpp index 053a18430b..c7f8345898 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -331,9 +331,11 @@ Atom::~Atom() delete [] iname[i]; memory->destroy(ivector[i]); } - for (int i = 0; i < ndvector; i++) { - delete [] dname[i]; - memory->destroy(dvector[i]); + if (dvector != NULL) { + for (int i = 0; i < ndvector; i++) { + delete [] dname[i]; + memory->destroy(dvector[i]); + } } memory->sfree(iname); From d93e3d1cee93983df5e1c0707b4957d2bc138e9a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 16 Dec 2016 10:06:12 -0700 Subject: [PATCH 015/267] Fixing runtime issues with pair_exp6_rx_kokkos --- src/KOKKOS/fix_property_atom_kokkos.cpp | 2 +- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 17 ++++++++++++++--- src/USER-DPD/pair_exp6_rx.cpp | 10 ++++++---- src/USER-DPD/pair_exp6_rx.h | 4 ++-- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 327563efbd..cb52988c31 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -60,7 +60,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[m]][nmax_old],0,nbytes); } else if (style[m] == DOUBLE) { - memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,nvalue,nmax, + memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.dimension_0(),nmax, "atom:dvector"); //memory->grow(atom->dvector[index[m]],nmax,"atom:dvector"); //size_t nbytes = (nmax-nmax_old) * sizeof(double); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 7e74f39ef0..e7934cfa0b 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -65,7 +65,20 @@ PairExp6rxKokkos::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp) template PairExp6rxKokkos::~PairExp6rxKokkos() { + if (copymode) return; + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + + memory->destroy_kokkos(k_cutsq,cutsq); + + for (int i=0; i < nparams; ++i) { + delete[] params[i].name; + delete[] params[i].potential; + } + memory->destroy_kokkos(k_params,params); + + memory->destroy_kokkos(k_mol2param,mol2param); } /* ---------------------------------------------------------------------- */ @@ -73,7 +86,7 @@ PairExp6rxKokkos::~PairExp6rxKokkos() template void PairExp6rxKokkos::init_style() { - PairExp6rxKokkos::init_style(); + PairExp6rx::init_style(); // irequest = neigh request made by parent class @@ -89,11 +102,9 @@ void PairExp6rxKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->ghost = 1; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->ghost = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); } diff --git a/src/USER-DPD/pair_exp6_rx.cpp b/src/USER-DPD/pair_exp6_rx.cpp index 2643c9ec04..dd8ac4bbe7 100644 --- a/src/USER-DPD/pair_exp6_rx.cpp +++ b/src/USER-DPD/pair_exp6_rx.cpp @@ -79,11 +79,13 @@ PairExp6rx::~PairExp6rx() { if (copymode) return; - for (int i=0; i < nparams; ++i) { - delete[] params[i].name; - delete[] params[i].potential; + if (params != NULL) { + for (int i=0; i < nparams; ++i) { + delete[] params[i].name; + delete[] params[i].potential; + } + memory->destroy(params); } - memory->destroy(params); memory->destroy(mol2param); if (allocated) { diff --git a/src/USER-DPD/pair_exp6_rx.h b/src/USER-DPD/pair_exp6_rx.h index dd9fa22a48..f9654e4086 100644 --- a/src/USER-DPD/pair_exp6_rx.h +++ b/src/USER-DPD/pair_exp6_rx.h @@ -44,7 +44,7 @@ class PairExp6rx : public Pair { double **epsilon,**rm,**alpha; double **rminv,**buck1,**buck2,**offset; - void allocate(); + virtual void allocate(); int *mol2param; // mapping from molecule to parameters int nparams; // # of stored parameter sets int maxparam; // max # of parameter sets @@ -58,7 +58,7 @@ class PairExp6rx : public Pair { Param *params; // parameter set for an I-J-K interaction int nspecies; - void read_file(char *); + virtual void read_file(char *); void setup(); int isite1, isite2; From cfa61b98aec5951824affe4057f81e022868d470 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 16 Dec 2016 12:37:41 -0700 Subject: [PATCH 016/267] Fixing runtime issues in fix_eos_table_rx_kokkos --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 152 ++++++++++++++++++++----- src/KOKKOS/fix_eos_table_rx_kokkos.h | 33 ++++++ 2 files changed, 157 insertions(+), 28 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 75e9b292f9..6cb5c0611a 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -40,8 +40,12 @@ FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + update_table = 1; + h_table = new TableHost(); + d_table = new TableDevice(); k_error_flag = DAT::tdual_int_scalar("fix:error_flag"); k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag"); @@ -53,6 +57,9 @@ template FixEOStableRXKokkos::~FixEOStableRXKokkos() { if (copymode) return; + + delete h_table; + delete d_table; } /* ---------------------------------------------------------------------- */ @@ -60,6 +67,11 @@ FixEOStableRXKokkos::~FixEOStableRXKokkos() template void FixEOStableRXKokkos::setup(int vflag) { + if (update_table) + create_kokkos_tables(); + + copymode = 1; + int nlocal = atom->nlocal; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); @@ -68,6 +80,10 @@ void FixEOStableRXKokkos::setup(int vflag) dpdTheta= atomKK->k_dpdTheta.view(); uCG = atomKK->k_uCG.view(); uCGnew = atomKK->k_uCGnew.view(); + dvector = atomKK->k_dvector.view(); + + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + atomKK->modified(execution_space,UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); @@ -77,6 +93,8 @@ void FixEOStableRXKokkos::setup(int vflag) Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); error_check(); + + copymode = 0; } template @@ -102,12 +120,21 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXTemperatureLook template void FixEOStableRXKokkos::init() { + if (update_table) + create_kokkos_tables(); + + copymode = 1; + int nlocal = atom->nlocal; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); uChem = atomKK->k_uChem.view(); dpdTheta= atomKK->k_dpdTheta.view(); + dvector = atomKK->k_dvector.view(); + + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK); if (this->restart_reset) Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); @@ -115,6 +142,8 @@ void FixEOStableRXKokkos::init() Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); error_check(); + + copymode = 0; } template @@ -136,16 +165,27 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXInit, const int template void FixEOStableRXKokkos::post_integrate() { + if (update_table) + create_kokkos_tables(); + + copymode = 1; + int nlocal = atom->nlocal; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); uChem = atomKK->k_uChem.view(); dpdTheta= atomKK->k_dpdTheta.view(); + dvector = atomKK->k_dvector.view(); + + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); + atomKK->modified(execution_space,DPDTHETA_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); error_check(); + + copymode = 0; } template @@ -163,6 +203,11 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXTemperatureLook template void FixEOStableRXKokkos::end_of_step() { + if (update_table) + create_kokkos_tables(); + + copymode = 1; + int nlocal = atom->nlocal; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); @@ -171,7 +216,10 @@ void FixEOStableRXKokkos::end_of_step() dpdTheta= atomKK->k_dpdTheta.view(); uCG = atomKK->k_uCG.view(); uCGnew = atomKK->k_uCGnew.view(); - double duChem; + dvector = atomKK->k_dvector.view(); + + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); + atomKK->modified(execution_space,UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK); // Communicate the ghost uCGnew comm->reverse_comm_fix(this); @@ -184,6 +232,8 @@ void FixEOStableRXKokkos::end_of_step() Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); error_check(); + + copymode = 0; } /* ---------------------------------------------------------------------- @@ -200,22 +250,27 @@ void FixEOStableRXKokkos::energy_lookup(int id, double thetai, doubl ui = 0.0; nTotal = 0.0; for(int ispecies=0;ispecieslo); - thetai = MIN(thetai,tb->hi); + //Table *tb = &tables[ispecies]; + //thetai = MAX(thetai,tb->lo); + thetai = MAX(thetai,d_table_const.lo(ispecies)); + //thetai = MIN(thetai,tb->hi); + thetai = MIN(thetai,d_table_const.hi(ispecies)); if (tabstyle == LINEAR) { - itable = static_cast ((thetai - tb->lo) * tb->invdelta); - fraction = (thetai - tb->r[itable]) * tb->invdelta; - uTmp = tb->e[itable] + fraction*tb->de[itable]; + //itable = static_cast ((thetai - tb->lo) * tb->invdelta); + itable = static_cast ((thetai - d_table_const.lo(ispecies)) * d_table_const.invdelta(ispecies)); + //fraction = (thetai - tb->r[itable]) * tb->invdelta; + fraction = (thetai - d_table_const.r(ispecies,itable)) * d_table_const.invdelta(ispecies); + //uTmp = tb->e[itable] + fraction*tb->de[itable]; + uTmp = d_table_const.e(ispecies,itable) + fraction*d_table_const.de(ispecies,itable); uTmp += dHf[ispecies]; // mol fraction form: - ui += atom->dvector[ispecies][id]*uTmp; - nTotal += atom->dvector[ispecies][id]; + ui += dvector(ispecies,id)*uTmp; + nTotal += dvector(ispecies,id); } } - ui = ui - double(nTotal+1.5)*force->boltz*thetai; + ui = ui - double(nTotal+1.5)*force->boltz*thetai; // need class variable } /* ---------------------------------------------------------------------- @@ -226,18 +281,20 @@ template KOKKOS_INLINE_FUNCTION void FixEOStableRXKokkos::temperature_lookup(int id, double ui, double &thetai) const { - Table *tb = &tables[0]; + //Table *tb = &tables[0]; int it; double t1,t2,u1,u2,f1,f2; double maxit = 100; double temp; double delta = 0.001; + int lo = d_table_const.lo(0); + int hi = d_table_const.hi(0); // Store the current thetai in t1 - t1 = MAX(thetai,tb->lo); - t1 = MIN(t1,tb->hi); - if(t1==tb->hi) delta = -delta; + t1 = MAX(thetai,lo); + t1 = MIN(t1,hi); + if(t1==hi) delta = -delta; // Compute u1 at thetai energy_lookup(id,t1,u1); @@ -259,8 +316,8 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub if(fabs(f2-f1)<1e-15){ if(isnan(f1) || isnan(f2)) k_error_flag.d_view() = 2; temp = t1; - temp = MAX(temp,tb->lo); - temp = MIN(temp,tb->hi); + temp = MAX(temp,lo); + temp = MIN(temp,hi); k_warning_flag.d_view() = 1; break; } @@ -286,9 +343,6 @@ template int FixEOStableRXKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int ii,jj,m; - uChem = atomKK->k_uChem.view(); - uCG = atomKK->k_uCG.view(); - uCGnew = atomKK->k_uCGnew.view(); m = 0; for (ii = 0; ii < n; ii++) { @@ -306,9 +360,6 @@ template void FixEOStableRXKokkos::unpack_forward_comm(int n, int first, double *buf) { int ii,m,last; - uChem = atomKK->k_uChem.view(); - uCG = atomKK->k_uCG.view(); - uCGnew = atomKK->k_uCGnew.view(); m = 0; last = first + n ; @@ -325,8 +376,6 @@ template int FixEOStableRXKokkos::pack_reverse_comm(int n, int first, double *buf) { int i,m,last; - uCG = atomKK->k_uCG.view(); - uCGnew = atomKK->k_uCGnew.view(); m = 0; last = first + n; @@ -343,8 +392,6 @@ template void FixEOStableRXKokkos::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,m; - uCG = atomKK->k_uCG.view(); - uCGnew = atomKK->k_uCGnew.view(); m = 0; for (i = 0; i < n; i++) { @@ -381,6 +428,55 @@ void FixEOStableRXKokkos::error_check() /* ---------------------------------------------------------------------- */ +template +void FixEOStableRXKokkos::create_kokkos_tables() +{ + const int tlm1 = tablength-1; + + memory->create_kokkos(d_table->lo,h_table->lo,ntables,"Table::lo"); + memory->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi"); + memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + + if(tabstyle == LINEAR) { + memory->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r"); + memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + } + + for(int i=0; i < ntables; i++) { + Table* tb = &tables[i]; + + h_table->lo[i] = tb->lo; + h_table->hi[i] = tb->hi; + h_table->invdelta[i] = tb->invdelta; + + for(int j = 0; jr.dimension_1(); j++) + h_table->r(i,j) = tb->r[j]; + for(int j = 0; je.dimension_1(); j++) + h_table->e(i,j) = tb->e[j]; + for(int j = 0; jde.dimension_1(); j++) + h_table->de(i,j) = tb->de[j]; + } + + Kokkos::deep_copy(d_table->lo,h_table->lo); + Kokkos::deep_copy(d_table->hi,h_table->hi); + Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); + Kokkos::deep_copy(d_table->r,h_table->r); + Kokkos::deep_copy(d_table->e,h_table->e); + Kokkos::deep_copy(d_table->de,h_table->de); + + d_table_const.lo = d_table->lo; + d_table_const.hi = d_table->hi; + d_table_const.invdelta = d_table->invdelta; + d_table_const.r = d_table->r; + d_table_const.e = d_table->e; + d_table_const.de = d_table->de; + + update_table = 0; +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class FixEOStableRXKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h index 9b0ca366a0..7de8f4dbc4 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.h +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -75,13 +75,46 @@ class FixEOStableRXKokkos : public FixEOStableRX { //}; //Table *tables, *tables2; + /*struct TableDeviceConst { + typename ArrayTypes::t_int_1d_randomread lo,hi; + typename ArrayTypes::t_ffloat_1d_randomread invdelta; + typename ArrayTypes::t_ffloat_2d_randomread r,e,de; + };*/ + //Its faster not to use texture fetch if the number of tables is less than 32! + struct TableDeviceConst { + typename ArrayTypes::t_int_1d lo,hi; + typename ArrayTypes::t_ffloat_1d invdelta; + typename ArrayTypes::t_ffloat_2d_randomread r,e,de; + }; + + struct TableDevice { + typename ArrayTypes::t_int_1d lo,hi; + typename ArrayTypes::t_ffloat_1d invdelta; + typename ArrayTypes::t_ffloat_2d r,e,de; + }; + + struct TableHost { + typename ArrayTypes::t_int_1d lo,hi; + typename ArrayTypes::t_ffloat_1d invdelta; + typename ArrayTypes::t_ffloat_2d r,e,de; + }; + + TableDeviceConst d_table_const; + TableDevice* d_table; + TableHost* h_table; + + int **tabindex; + void allocate(); void error_check(); + int update_table; + void create_kokkos_tables(); //double *dHf; typename AT::t_int_1d mask; typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem; + typename AT::t_float_2d dvector; DAT::tdual_int_scalar k_error_flag; DAT::tdual_int_scalar k_warning_flag; From 5cae3eca8c43fc9712a28776e1d196b638d8216c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 16 Dec 2016 17:09:19 -0700 Subject: [PATCH 017/267] Whitespace cleanup to pair_dpd_fdt_energy, should be cherry-picked to Master --- src/USER-DPD/pair_dpd_fdt_energy.cpp | 80 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index 2041405467..0f6141d015 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -206,7 +206,7 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag) if (r < EPSILON) continue; // r can be 0.0 in DPD systems rinv = 1.0/r; wr = 1.0 - r/cut[itype][jtype]; - wd = wr*wr; + wd = wr*wr; delvx = vxtmp - v[j][0]; delvy = vytmp - v[j][1]; @@ -214,11 +214,11 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag) dot = delx*delvx + dely*delvy + delz*delvz; randnum = random->gaussian(); - // Compute the current temperature - theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]); - theta_ij = 1.0/theta_ij; - - gamma_ij = sigma[itype][jtype]*sigma[itype][jtype] + // Compute the current temperature + theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]); + theta_ij = 1.0/theta_ij; + + gamma_ij = sigma[itype][jtype]*sigma[itype][jtype] / (2.0*force->boltz*theta_ij); // conservative force = a0 * wr @@ -239,44 +239,44 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag) f[j][2] -= delz*fpair; } - if (rmass) { - mass_i = rmass[i]; - mass_j = rmass[j]; - } else { - mass_i = mass[itype]; - mass_j = mass[jtype]; - } - massinv_i = 1.0 / mass_i; - massinv_j = 1.0 / mass_j; + if (rmass) { + mass_i = rmass[i]; + mass_j = rmass[j]; + } else { + mass_i = mass[itype]; + mass_j = mass[jtype]; + } + massinv_i = 1.0 / mass_i; + massinv_j = 1.0 / mass_j; - // Compute the mechanical and conductive energy, uMech and uCond - mu_ij = massinv_i + massinv_j; - mu_ij *= force->ftm2v; + // Compute the mechanical and conductive energy, uMech and uCond + mu_ij = massinv_i + massinv_j; + mu_ij *= force->ftm2v; - uTmp = gamma_ij*wd*rinv*rinv*dot*dot - - 0.5*sigma[itype][jtype]*sigma[itype][jtype]*mu_ij*wd; - uTmp -= sigma[itype][jtype]*wr*rinv*dot*randnum*dtinvsqrt; - uTmp *= 0.5; + uTmp = gamma_ij*wd*rinv*rinv*dot*dot + - 0.5*sigma[itype][jtype]*sigma[itype][jtype]*mu_ij*wd; + uTmp -= sigma[itype][jtype]*wr*rinv*dot*randnum*dtinvsqrt; + uTmp *= 0.5; - duMech[i] += uTmp; - if (newton_pair || j < nlocal) { - duMech[j] += uTmp; - } - - // Compute uCond - randnum = random->gaussian(); - kappa_ij = kappa[itype][jtype]; - alpha_ij = sqrt(2.0*force->boltz*kappa_ij); - randPair = alpha_ij*wr*randnum*dtinvsqrt; + duMech[i] += uTmp; + if (newton_pair || j < nlocal) { + duMech[j] += uTmp; + } + + // Compute uCond + randnum = random->gaussian(); + kappa_ij = kappa[itype][jtype]; + alpha_ij = sqrt(2.0*force->boltz*kappa_ij); + randPair = alpha_ij*wr*randnum*dtinvsqrt; + + uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd; + uTmp += randPair; + + duCond[i] += uTmp; + if (newton_pair || j < nlocal) { + duCond[j] -= uTmp; + } - uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd; - uTmp += randPair; - - duCond[i] += uTmp; - if (newton_pair || j < nlocal) { - duCond[j] -= uTmp; - } - if (eflag) { // unshifted eng of conservative term: // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); From ac57f4721cea7db41b6b964d6fb4a772fa5c5202 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 16 Dec 2016 17:14:27 -0700 Subject: [PATCH 018/267] Small whitespace tweak to pair_dpd_fdt_energy --- src/USER-DPD/pair_dpd_fdt_energy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index 0f6141d015..558994d35e 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -254,7 +254,7 @@ void PairDPDfdtEnergy::compute(int eflag, int vflag) mu_ij *= force->ftm2v; uTmp = gamma_ij*wd*rinv*rinv*dot*dot - - 0.5*sigma[itype][jtype]*sigma[itype][jtype]*mu_ij*wd; + - 0.5*sigma[itype][jtype]*sigma[itype][jtype]*mu_ij*wd; uTmp -= sigma[itype][jtype]*wr*rinv*dot*randnum*dtinvsqrt; uTmp *= 0.5; From 21bb603b93181e4551ebc69fc0e225bf055cd21d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 13:30:00 -0700 Subject: [PATCH 019/267] Porting recent changes from USER-DPD package to KOKKOS package --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 7 ++-- src/KOKKOS/fix_eos_table_rx_kokkos.h | 4 +++ src/KOKKOS/pair_exp6_rx_kokkos.cpp | 50 ++++++++++++++++++++------ 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 6cb5c0611a..3b22f61e66 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -85,7 +85,8 @@ void FixEOStableRXKokkos::setup(int vflag) atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); atomKK->modified(execution_space,UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK); - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + if (!this->restart_reset) + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); // Communicate the updated momenta and velocities to all nodes comm->forward_comm_fix(this); @@ -154,8 +155,8 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXInit, const int if(dpdTheta[i] <= 0.0) k_error_flag.d_view() = 1; energy_lookup(i,dpdTheta[i],tmp); - uCond[i] = tmp / 2.0; - uMech[i] = tmp / 2.0; + uCond[i] = 0.0; + uMech[i] = tmp; uChem[i] = 0.0; } } diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h index 7de8f4dbc4..3b9a00afe2 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.h +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -155,6 +155,10 @@ E: eos/table/rx values are not increasing The equation-of-state must an increasing function +E: FixEOStableRX requires atom_style with internal temperature and energies (e.g. dpd) + +Self-explanatory. + E: Internal temperature <= zero. Self-explanatory. diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index e7934cfa0b..e6b8a80f44 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -187,8 +187,10 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) k_error_flag.template modify(); k_error_flag.template sync(); - if (k_error_flag.h_view()) + if (k_error_flag.h_view() == 1) error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + else if (k_error_flag.h_view() == 2) + error->all(FLERR,"Computed fraction less than -1.0e-10"); int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); @@ -432,13 +434,13 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::getParamsEXP6(int id,double &epsilon1,double rm2_old *= pow(nTotalOFA_old,fuchslinR); } } + + // Check that no fractions are less than zero + if(fraction1 < 0.0){ + if(fraction1 < -1.0e-10){ + k_error_flag.d_view() = 2; + } + fraction1 = 0.0; + } + if(fraction2 < 0.0){ + if(fraction2 < -1.0e-10){ + k_error_flag.d_view() = 2; + } + fraction2 = 0.0; + } + if(fraction1_old < 0.0){ + if(fraction1_old < -1.0e-10){ + k_error_flag.d_view() = 2; + } + fraction1_old = 0.0; + } + if(fraction2_old < 0.0){ + if(fraction2_old < -1.0e-10){ + k_error_flag.d_view() = 2; + } + fraction2_old = 0.0; + } } /* ---------------------------------------------------------------------- */ From 3f1f51c1c7551165d023f8269663e45a0ccb08c3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 13:31:09 -0700 Subject: [PATCH 020/267] Changes necessary for runtime testing of Kokkos styles --- src/USER-DPD/fix_rx.cpp | 4 ++-- src/pair_hybrid.cpp | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index 47194cd7bc..1c2313c694 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -363,11 +363,11 @@ void FixRX::post_constructor() newarg2[nspecies+3] = (char *) "ghost"; newarg2[nspecies+4] = (char *) "yes"; - modify->add_fix(nspecies+5,newarg); + modify->add_fix(nspecies+5,newarg,1); fix_species = (FixPropertyAtom *) modify->fix[modify->nfix-1]; restartFlag = modify->fix[modify->nfix-1]->restart_reset; - modify->add_fix(nspecies+5,newarg2); + modify->add_fix(nspecies+5,newarg2,1); fix_species_old = (FixPropertyAtom *) modify->fix[modify->nfix-1]; if(nspecies==0) error->all(FLERR,"There are no rx species specified."); diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 620ceadfd9..d756b9be98 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -39,9 +39,6 @@ PairHybrid::PairHybrid(LAMMPS *lmp) : Pair(lmp), outerflag = 0; respaflag = 0; - - if (lmp->kokkos) - error->all(FLERR,"Cannot yet use pair hybrid with Kokkos"); } /* ---------------------------------------------------------------------- */ From 000df6e1cf3ebf6cfccc69268aafd60ed1fba1b0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 15:20:10 -0700 Subject: [PATCH 021/267] Fixing what seems to be a Kokkos bug, I will submit to Kokkos lib developers too --- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index d7c06dc14b..d54abeceb0 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -670,8 +670,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); From 99910fc43241df916648ad24253f8c757a1b711c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 15:27:16 -0700 Subject: [PATCH 022/267] Adding CPU runtime tested version of pair_dpd_fdt_energy_kokkos --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 694 ++++++++++++++++------ src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 104 +++- src/KOKKOS/rand_pool_wrap.cpp | 72 +++ src/KOKKOS/rand_pool_wrap.h | 84 +++ src/USER-DPD/pair_dpd_fdt_energy.cpp | 2 + src/USER-DPD/pair_dpd_fdt_energy.h | 6 +- 7 files changed, 737 insertions(+), 227 deletions(-) create mode 100644 src/KOKKOS/rand_pool_wrap.cpp create mode 100644 src/KOKKOS/rand_pool_wrap.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index a1830163bd..94be32cc32 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -200,6 +200,8 @@ action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h action pppm_kokkos.cpp pppm.cpp action pppm_kokkos.h pppm.h +action rand_pool_wrap_kokkos.cpp +action rand_pool_wrap_kokkos.h action region_block_kokkos.cpp action region_block_kokkos.h action verlet_kokkos.cpp diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 0bfbb9491e..3b49f43246 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -12,15 +12,13 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: James Larentzos (U.S. Army Research Laboratory) + Contributing author: Stan Moore (Sandia) ------------------------------------------------------------------------- */ #include #include #include #include -#include "pair_dpd_fdt_energy_kokkos.h" -#include "kokkos.h" #include "atom_kokkos.h" #include "atom_vec.h" #include "comm.h" @@ -31,30 +29,26 @@ #include "neigh_list.h" #include "neigh_request.h" #include "random_mars.h" -#include "math_const.h" #include "memory.h" #include "modify.h" +#include "pair_dpd_fdt_energy_kokkos.h" #include "error.h" #include "atom_masks.h" using namespace LAMMPS_NS; -using namespace MathConst; - -#define KOKKOS_CUDA_MAX_THREADS 256 -#define KOKKOS_CUDA_MIN_BLOCKS 8 #define EPSILON 1.0e-10 /* ---------------------------------------------------------------------- */ template -PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDPDfdtEnergy(lmp) +PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : + PairDPDfdtEnergy(lmp),rand_pool(seed + comm->me /** , lmp/**/) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | TAG_MASK | ENERGY_MASK | VIRIAL_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; - cutsq = NULL; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; } /* ---------------------------------------------------------------------- */ @@ -62,26 +56,49 @@ PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDP template PairDPDfdtEnergyKokkos::~PairDPDfdtEnergyKokkos() { + if (copymode) return; + if (allocated) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); - k_cutsq = DAT::tdual_ffloat_2d(); - memory->sfree(cutsq); - eatom = NULL; - vatom = NULL; - cutsq = NULL; + memory->destroy_kokkos(k_duCond,duCond); + memory->destroy_kokkos(k_duMech,duMech); } + + memory->destroy_kokkos(k_cutsq,cutsq); + + /** rand_pool.destroy();/**/ } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ template -void PairDPDfdtEnergyKokkos::cleanup_copy() { - // WHY needed: this prevents parent copy from deallocating any arrays - allocated = 0; - cutsq = NULL; - eatom = NULL; - vatom = NULL; +void PairDPDfdtEnergyKokkos::init_style() +{ + PairDPDfdtEnergy::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + } + + /** rand_pool.init(random,seed);/**/ } /* ---------------------------------------------------------------------- */ @@ -89,9 +106,12 @@ void PairDPDfdtEnergyKokkos::cleanup_copy() { template void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) { + copymode = 1; + eflag = eflag_in; vflag = vflag_in; + if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -100,35 +120,115 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); - d_eatom = k_eatom.view(); + d_eatom = k_eatom.d_view; } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); - d_vatom = k_vatom.view(); + d_vatom = k_vatom.d_view; } - atomKK->sync(execution_space,datamask_read); - k_cutsq.template sync(); - if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); - else atomKK->modified(execution_space,F_MASK); - x = atomKK->k_x.view(); - c_x = atomKK->k_x.view(); + v = atomKK->k_v.view(); f = atomKK->k_f.view(); type = atomKK->k_type.view(); - tag = atomKK->k_tag.view(); + mass = atomKK->k_mass.view(); + rmass = atomKK->rmass; + dpdTheta = atomKK->k_dpdTheta.view(); + + k_cutsq.template sync(); + k_params.template sync(); + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDTHETA_MASK | RMASS_MASK); + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); + atomKK->k_mass.sync(); + nlocal = atom->nlocal; - nall = atom->nlocal + atom->nghost; - newton_pair = force->newton_pair; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; + int nghost = atom->nghost; + int newton_pair = force->newton_pair; + dtinvsqrt = 1.0/sqrt(update->dt); + + int inum = list->inum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + boltz = force->boltz; + + int STACKPARAMS = 0; // optimize // loop over neighbors of my atoms - EV_FLOAT ev = pair_compute,void >(this,(NeighListKokkos*)list); + EV_FLOAT ev; + + if (splitFDT_flag) { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } else { + + // Allocate memory for duCond and duMech + if (allocated) { + memory->destroy_kokkos(k_duCond,duCond); + memory->destroy_kokkos(k_duMech,duMech); + } + memory->create_kokkos(k_duCond,duCond,nlocal+nghost,"pair:duCond"); + memory->create_kokkos(k_duMech,duMech,nlocal+nghost,"pair:duMech"); + d_duCond = k_duCond.view(); + d_duMech = k_duMech.view(); + h_duCond = k_duCond.h_view; + h_duMech = k_duMech.h_view; + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal+nghost),*this); + + atomKK->sync(execution_space,V_MASK); + + // loop over neighbors of my atoms + + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + + // Communicate the ghost delta energies to the locally owned atoms + + k_duCond.template modify(); + k_duCond.template sync(); + k_duMech.template modify(); + k_duMech.template sync(); + comm->reverse_comm_pair(this); + //k_duCond.template modify(); + //k_duCond.template sync(); + //k_duMech.template modify(); + //k_duMech.template sync(); + } if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -151,125 +251,262 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) k_vatom.template modify(); k_vatom.template sync(); } + + copymode = 0; } template -template KOKKOS_INLINE_FUNCTION -F_FLOAT PairDPDfdtEnergyKokkos:: -compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - (void) i; - (void) j; - const F_FLOAT r = sqrt(rsq); - if (r < EPSILON) return 0; // r can be 0.0 in DPD systems - const F_FLOAT rinv = 1.0/r; - const F_FLOAT wr = 1.0 - r/cut[itype][jtype]; - const F_FLOAT wd = wr*wr; - - // conservative force = a0 * wr - return a0[itype][jtype]*wr*rinv; +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyZero, const int &ii) const { + d_duCond[ii] = 0.0; + d_duMech[ii] = 0.0; } template -template +template KOKKOS_INLINE_FUNCTION -F_FLOAT PairDPDfdtEnergyKokkos:: -compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - (void) i; - (void) j; - const F_FLOAT r = sqrt(rsq); - if (r < EPSILON) return 0; // r can be 0.0 in DPD systems - const F_FLOAT rinv = 1.0/r; - const F_FLOAT wr = 1.0 - r/cut[itype][jtype]; - const F_FLOAT wd = wr*wr; - // unshifted eng of conservative term: - // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); - // eng shifted to 0.0 at cutoff - return 0.5*a0[itype][jtype]*cut[itype][jtype] * wd; -} +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii, EV_FLOAT& ev) const { + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; -/* - int i,j,ii,jj,inum,jnum,itype,jtype; + int i,j,jj,inum,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r,rinv,wd,wr,factor_dpd; - int *ilist,*jlist,*numneigh,**firstneigh; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,wd,wr,factor_dpd,uTmp; + double dot,randnum; - evdwl = 0.0; - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; + double kappa_ij, alpha_ij, theta_ij, gamma_ij; + double mass_i, mass_j; + double massinv_i, massinv_j; + double randPair, mu_ij; - double **x = atom->x; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - int newton_pair = force->newton_pair; + i = d_ilist[ii]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + itype = type[i]; + jnum = d_numneigh[i]; - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; - // loop over neighbors of my atoms + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + factor_dpd = special_lj[sbmask(j)]; + j &= NEIGHMASK; - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; + delx = xtmp - x(j,0); + dely = ytmp - x(j,1); + delz = ztmp - x(j,2); + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_dpd = special_lj[sbmask(j)]; - j &= NEIGHMASK; + double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype); + if (rsq < cutsq_ij) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut; + wr = 1.0 - r/cut_ij; + wd = wr*wr; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; + // conservative force = a0 * wr + double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0; + fpair = a0_ij*wr; + fpair *= factor_dpd*rinv; - if (rsq < cutsq[itype][jtype]) { - r = sqrt(rsq); - if (r < EPSILON) continue; // r can be 0.0 in DPD systems - rinv = 1.0/r; - wr = 1.0 - r/cut[itype][jtype]; - wd = wr*wr; - - // conservative force = a0 * wr - fpair = a0[itype][jtype]*wr; - fpair *= factor_dpd*rinv; - - f[i][0] += delx*fpair; - f[i][1] += dely*fpair; - f[i][2] += delz*fpair; - if (newton_pair || j < nlocal) { - f[j][0] -= delx*fpair; - f[j][1] -= dely*fpair; - f[j][2] -= delz*fpair; - } - - if (eflag) { - // unshifted eng of conservative term: - // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); - // eng shifted to 0.0 at cutoff - evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd; - evdwl *= factor_dpd; - } - - if (evflag) ev_tally(i,j,nlocal,newton_pair, - evdwl,0.0,fpair,delx,dely,delz); + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; } + + if (eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype)); + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0_ij*cut_ij * wd; + evdwl *= factor_dpd; + ev.evdwl += evdwl; + } + + if (EVFLAG) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } } - if (vflag_fdotr) virial_fdotr_compute(); + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairDPDfdtEnergyComputeSplit(), ii, ev); +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii, EV_FLOAT& ev) const { + + // These array are atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + Kokkos::View::value> > a_duCond = d_duCond; + Kokkos::View::value> > a_duMech = d_duMech; + + int i,j,jj,inum,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,wd,wr,factor_dpd,uTmp; + double dot,randnum; + + double kappa_ij, alpha_ij, theta_ij, gamma_ij; + double mass_i, mass_j; + double massinv_i, massinv_j; + double randPair, mu_ij; + + rand_type rand_gen = rand_pool.get_state(); + + i = d_ilist[ii]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + vxtmp = v(i,0); + vytmp = v(i,1); + vztmp = v(i,2); + itype = type[i]; + jnum = d_numneigh[i]; + + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + factor_dpd = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x(j,0); + dely = ytmp - x(j,1); + delz = ztmp - x(j,2); + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype); + if (rsq < cutsq_ij) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut; + wr = 1.0 - r/cut_ij; + wd = wr*wr; + + delvx = vxtmp - v(j,0); + delvy = vytmp - v(j,1); + delvz = vztmp - v(j,2); + dot = delx*delvx + dely*delvy + delz*delvz; + randnum = rand_gen.normal(); + + // Compute the current temperature + theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]); + theta_ij = 1.0/theta_ij; + + double sigma_ij = STACKPARAMS?m_params[itype][jtype].sigma:params(itype,jtype).sigma; + gamma_ij = sigma_ij*sigma_ij + / (2.0*boltz*theta_ij); + + // conservative force = a0 * wr + // drag force = -gamma * wr^2 * (delx dot delv) / r + // random force = sigma * wr * rnd * dtinvsqrt; + + double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0; + fpair = a0_ij*wr; + fpair -= gamma_ij*wd*dot*rinv; + fpair += sigma_ij*wr*randnum*dtinvsqrt; + fpair *= factor_dpd*rinv; + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } + + if (rmass) { + mass_i = rmass[i]; + mass_j = rmass[j]; + } else { + mass_i = mass[itype]; + mass_j = mass[jtype]; + } + massinv_i = 1.0 / mass_i; + massinv_j = 1.0 / mass_j; + + // Compute the mechanical and conductive energy, uMech and uCond + mu_ij = massinv_i + massinv_j; + mu_ij *= force->ftm2v; + + uTmp = gamma_ij*wd*rinv*rinv*dot*dot + - 0.5*sigma_ij*sigma_ij*mu_ij*wd; + uTmp -= sigma_ij*wr*rinv*dot*randnum*dtinvsqrt; + uTmp *= 0.5; + + a_duMech[i] += uTmp; + if (NEWTON_PAIR || j < nlocal) { + a_duMech[j] += uTmp; + } + + // Compute uCond + randnum = rand_gen.normal(); + kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; + alpha_ij = sqrt(2.0*boltz*kappa_ij); + randPair = alpha_ij*wr*randnum*dtinvsqrt; + + uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd; + uTmp += randPair; + + a_duCond[i] += uTmp; + if (NEWTON_PAIR || j < nlocal) { + a_duCond[j] -= uTmp; + } + + if (eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype)); + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0_ij*cut_ij * wd; + evdwl *= factor_dpd; + ev.evdwl += evdwl; + } + + if (EVFLAG) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + + rand_pool.free_state(rand_gen); +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairDPDfdtEnergyComputeNoSplit(), ii, ev); } -*/ /* ---------------------------------------------------------------------- allocate all arrays @@ -281,69 +518,26 @@ void PairDPDfdtEnergyKokkos::allocate() PairDPDfdtEnergy::allocate(); int n = atom->ntypes; + int nlocal = atom->nlocal; + int nghost = atom->nghost; + memory->destroy(cutsq); memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view(); -} -/* ---------------------------------------------------------------------- - global settings -------------------------------------------------------------------------- */ + k_params = Kokkos::DualView("PairDPDfdtEnergy::params",n+1,n+1); + params = k_params.d_view; -template -void PairDPDfdtEnergyKokkos::settings(int narg, char **arg) -{ - if (narg != 2) error->all(FLERR,"Illegal pair_style command"); - - PairDPDfdtEnergy::settings(2,arg); -} - -/* ---------------------------------------------------------------------- - init specific to this pair style -------------------------------------------------------------------------- */ - -template -void PairDPDfdtEnergyKokkos::init_style() -{ - PairDPDfdtEnergy::init_style(); - - neighflag = lmp->kokkos->neighflag; - int irequest = neighbor->nrequest - 1; - - neighbor->requests[irequest]-> - kokkos_host = Kokkos::Impl::is_same::value && - !Kokkos::Impl::is_same::value; - neighbor->requests[irequest]-> - kokkos_device = Kokkos::Impl::is_same::value; - - if (neighflag == HALF || neighflag == HALFTHREAD) { - neighbor->requests[irequest]->full = 0; - neighbor->requests[irequest]->half = 1; - } else { - error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk"); + if (!splitFDT_flag) { + memory->destroy(duCond); + memory->destroy(duMech); + memory->create_kokkos(k_duCond,duCond,nlocal+nghost+1,"pair:duCond"); + memory->create_kokkos(k_duMech,duMech,nlocal+nghost+1,"pair:duMech"); + d_duCond = k_duCond.view(); + d_duMech = k_duMech.view(); + h_duCond = k_duCond.h_view; + h_duMech = k_duMech.h_view; } - -/* - if (comm->ghost_velocity == 0) - error->all(FLERR,"Pair dpd/fdt/energy requires ghost atoms store velocity"); - - // if newton off, forces between atoms ij will be double computed - // using different random numbers - - if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR, - "Pair dpd/fdt/energy requires newton pair on"); - - int irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->ssa = 0; - for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"shardlow") == 0) - neighbor->requests[irequest]->ssa = 1; - - bool eos_flag = false; - for (int i = 0; i < modify->nfix; i++) - if (strncmp(modify->fix[i]->style,"eos",3) == 0) eos_flag = true; - if(!eos_flag) error->all(FLERR,"pair_style dpd/fdt/energy requires an EOS to be specified"); -*/ } /* ---------------------------------------------------------------------- @@ -355,21 +549,129 @@ double PairDPDfdtEnergyKokkos::init_one(int i, int j) { double cutone = PairDPDfdtEnergy::init_one(i,j); + k_params.h_view(i,j).cut = cut[i][j]; + k_params.h_view(i,j).a0 = a0[i][j]; + k_params.h_view(i,j).sigma = sigma[i][j]; + k_params.h_view(i,j).kappa = kappa[i][j]; + k_params.h_view(j,i) = k_params.h_view(i,j); if(i(); + k_params.template modify(); return cutone; } +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairDPDfdtEnergyKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (EFLAG) { + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairDPDfdtEnergyKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} namespace LAMMPS_NS { template class PairDPDfdtEnergyKokkos; #ifdef KOKKOS_HAVE_CUDA template class PairDPDfdtEnergyKokkos; #endif -} - +} \ No newline at end of file diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index a8a5f25801..b8d22eff34 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -22,67 +22,115 @@ PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) #ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H #define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H -#include "pair_kokkos.h" #include "pair_dpd_fdt_energy.h" -#include "neigh_list_kokkos.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" +#include "Kokkos_Random.hpp" +#include "rand_pool_wrap.h" namespace LAMMPS_NS { +struct TagPairDPDfdtEnergyZero{}; + +template +struct TagPairDPDfdtEnergyComputeSplit{}; + +template +struct TagPairDPDfdtEnergyComputeNoSplit{}; + template class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { public: - enum {EnabledNeighFlags=HALFTHREAD|HALF}; - enum {COUL_FLAG=0}; typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + PairDPDfdtEnergyKokkos(class LAMMPS *); virtual ~PairDPDfdtEnergyKokkos(); virtual void compute(int, int); - virtual void settings(int, char **); void init_style(); double init_one(int, int); + void operator()(TagPairDPDfdtEnergyZero, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairDPDfdtEnergyComputeSplit, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairDPDfdtEnergyComputeSplit, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + struct params_dpd { + params_dpd(){cut=0;a0=0;sigma=0;kappa=0;}; + params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;}; + F_FLOAT cut,a0,sigma,kappa; + }; + protected: - void cleanup_copy(); + int eflag,vflag; + int nlocal,neighflag; + int STACKPARAMS; + double dtinvsqrt; + double boltz; - template - KOKKOS_INLINE_FUNCTION - F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + virtual void allocate(); - template - KOKKOS_INLINE_FUNCTION - F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for 15 atom types + params_dpd m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cut[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array c_x; + typename ArrayTypes::t_v_array_randomread v; typename ArrayTypes::t_f_array f; typename ArrayTypes::t_int_1d_randomread type; + typename ArrayTypes::t_float_1d_randomread mass; + double *rmass; + typename AT::t_efloat_1d dpdTheta; + DAT::tdual_efloat_1d k_duCond,k_duMech; + typename AT::t_efloat_1d d_duCond,d_duMech; + HAT::t_efloat_1d h_duCond,h_duMech; DAT::tdual_efloat_1d k_eatom; DAT::tdual_virial_array k_vatom; - typename ArrayTypes::t_efloat_1d d_eatom; - typename ArrayTypes::t_virial_array d_vatom; - typename ArrayTypes::t_tagint_1d tag; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; - int newton_pair; - double special_lj[4]; + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; typename ArrayTypes::tdual_ffloat_2d k_cutsq; typename ArrayTypes::t_ffloat_2d d_cutsq; + /**/Kokkos::Random_XorShift64_Pool rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type;/**/ - int neighflag; - int nlocal,nall,eflag,vflag; + /**RandPoolWrap rand_pool; + typedef RandWrap rand_type;/**/ - void allocate(); - - friend class PairComputeFunctor; - friend class PairComputeFunctor; - friend class PairComputeFunctor; - friend class PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairDPDfdtEnergyKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairDPDfdtEnergyKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairDPDfdtEnergyKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairDPDfdtEnergyKokkos*); }; diff --git a/src/KOKKOS/rand_pool_wrap.cpp b/src/KOKKOS/rand_pool_wrap.cpp new file mode 100644 index 0000000000..b6fd0dbc55 --- /dev/null +++ b/src/KOKKOS/rand_pool_wrap.cpp @@ -0,0 +1,72 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "comm.h" +#include "rand_pool_wrap.h" +#include "lammps.h" +#include "kokkos.h" +#include "random_mars.h" +#include "update.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp) +{ + random_thr = NULL; + nthreads = lmp->kokkos->num_threads; +} + +/* ---------------------------------------------------------------------- */ + +RandPoolWrap::~RandPoolWrap() +{ + +} + +void RandPoolWrap::destroy() +{ + if (random_thr) { + for (int i=1; i < nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +void RandPoolWrap::init(RanMars* random, int seed) +{ + // deallocate pool of RNGs + if (random_thr) { + for (int i=1; i < this->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + } + + // allocate pool of RNGs + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. + nthreads = lmp->kokkos->num_threads; + random_thr = new RanMars*[nthreads]; + for (int tid = 1; tid < nthreads; ++tid) { + random_thr[tid] = new RanMars(lmp, seed + comm->me + + comm->nprocs*tid); + } + + // to ensure full compatibility with the serial style + // we use the serial random number generator instance for thread 0 + random_thr[0] = random; +} \ No newline at end of file diff --git a/src/KOKKOS/rand_pool_wrap.h b/src/KOKKOS/rand_pool_wrap.h new file mode 100644 index 0000000000..349896ee9a --- /dev/null +++ b/src/KOKKOS/rand_pool_wrap.h @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef RAND_POOL_WRAP_H +#define RAND_POOL_WRAP_H + +#include "pointers.h" +#include "kokkos_type.h" +#include "random_mars.h" +#include "error.h" + +namespace LAMMPS_NS { + +struct RandWrap { + class RanMars* rng; + + RandWrap() { + rng = NULL; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return rng->uniform(); + } + + KOKKOS_INLINE_FUNCTION + double normal() { + return rng->gaussian(); + } +}; + +class RandPoolWrap : protected Pointers { + public: + RandPoolWrap(int, class LAMMPS *); + ~RandPoolWrap(); + void destroy(); + void init(RanMars*, int); + + KOKKOS_INLINE_FUNCTION + RandWrap get_state() const + { +#ifdef KOKKOS_HAVE_CUDA + error->all(FLERR,"Cannot use Marsaglia RNG with GPUs"); +#endif + + RandWrap rand_wrap; + int tid = 0; +#ifndef KOKKOS_HAVE_CUDA + tid = LMPDeviceType::hardware_thread_id(); +#endif + rand_wrap.rng = random_thr[tid]; + return rand_wrap; + } + + KOKKOS_INLINE_FUNCTION + void free_state(RandWrap) const + { + + } + + void clean_copy() { random_thr = NULL; } + + private: + class RanMars **random_thr; + int nthreads; +}; + +} + +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index 558994d35e..19994acfa1 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -54,6 +54,8 @@ PairDPDfdtEnergy::PairDPDfdtEnergy(LAMMPS *lmp) : Pair(lmp) PairDPDfdtEnergy::~PairDPDfdtEnergy() { + if (copymode) return; + if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); diff --git a/src/USER-DPD/pair_dpd_fdt_energy.h b/src/USER-DPD/pair_dpd_fdt_energy.h index 335beea7e3..ff29667682 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.h +++ b/src/USER-DPD/pair_dpd_fdt_energy.h @@ -31,8 +31,8 @@ class PairDPDfdtEnergy : public Pair { virtual void compute(int, int); virtual void settings(int, char **); virtual void coeff(int, char **); - void init_style(); - double init_one(int, int); + virtual void init_style(); + virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); virtual void write_restart_settings(FILE *); @@ -53,7 +53,7 @@ class PairDPDfdtEnergy : public Pair { int seed; bool splitFDT_flag; - void allocate(); + virtual void allocate(); }; From 6f51c3b75c49e4777304e6f926ad53f4792e2ce1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 16:25:31 -0700 Subject: [PATCH 023/267] Fixing issues in pair_multi_lucy_rx_kokkos --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 4 ---- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 1 - 2 files changed, 5 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 03bbaf9907..76337b5219 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -72,8 +72,6 @@ PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult datamask_modify = EMPTY_MASK; update_table = 0; - ntables = 0; - tables = NULL; h_table = new TableHost(); d_table = new TableDevice(); @@ -112,11 +110,9 @@ void PairMultiLucyRXKokkos::init_style() if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->ghost = 1; } else if (neighflag == HALF || neighflag == HALFTHREAD) { neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; - neighbor->requests[irequest]->ghost = 1; } else { error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk"); } diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index a259588f78..b205f00796 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -119,7 +119,6 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { // double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; //}; - int tabstyle,tablength; /*struct TableDeviceConst { typename ArrayTypes::t_ffloat_2d_randomread cutsq; typename ArrayTypes::t_int_2d_randomread tabindex; From f62a6fe5a55d17cb95d0a3088d2c5d7f7f10b0ee Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 16:50:22 -0700 Subject: [PATCH 024/267] Renaming rand_pool_wrap to rand_pool_wrap_kokkos --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 2 +- src/KOKKOS/{rand_pool_wrap.cpp => rand_pool_wrap_kokkos.cpp} | 4 ++-- src/KOKKOS/{rand_pool_wrap.h => rand_pool_wrap_kokkos.h} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename src/KOKKOS/{rand_pool_wrap.cpp => rand_pool_wrap_kokkos.cpp} (98%) rename src/KOKKOS/{rand_pool_wrap.h => rand_pool_wrap_kokkos.h} (100%) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index b8d22eff34..67fa315721 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -26,7 +26,7 @@ PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) #include "pair_kokkos.h" #include "kokkos_type.h" #include "Kokkos_Random.hpp" -#include "rand_pool_wrap.h" +#include "rand_pool_wrap_kokkos.h" namespace LAMMPS_NS { diff --git a/src/KOKKOS/rand_pool_wrap.cpp b/src/KOKKOS/rand_pool_wrap_kokkos.cpp similarity index 98% rename from src/KOKKOS/rand_pool_wrap.cpp rename to src/KOKKOS/rand_pool_wrap_kokkos.cpp index b6fd0dbc55..c11764640b 100644 --- a/src/KOKKOS/rand_pool_wrap.cpp +++ b/src/KOKKOS/rand_pool_wrap_kokkos.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ #include "comm.h" -#include "rand_pool_wrap.h" +#include "rand_pool_wrap_kokkos.h" #include "lammps.h" #include "kokkos.h" #include "random_mars.h" @@ -69,4 +69,4 @@ void RandPoolWrap::init(RanMars* random, int seed) // to ensure full compatibility with the serial style // we use the serial random number generator instance for thread 0 random_thr[0] = random; -} \ No newline at end of file +} diff --git a/src/KOKKOS/rand_pool_wrap.h b/src/KOKKOS/rand_pool_wrap_kokkos.h similarity index 100% rename from src/KOKKOS/rand_pool_wrap.h rename to src/KOKKOS/rand_pool_wrap_kokkos.h From 889ee78f8ba0587020cffe61d5a6cb6515f2e4c6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Dec 2016 17:15:02 -0700 Subject: [PATCH 025/267] Change necessary for pair_exp6_rx_kokkos to compile on GPU --- src/USER-DPD/pair_exp6_rx.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/USER-DPD/pair_exp6_rx.h b/src/USER-DPD/pair_exp6_rx.h index f9654e4086..2dfc1c1a2e 100644 --- a/src/USER-DPD/pair_exp6_rx.h +++ b/src/USER-DPD/pair_exp6_rx.h @@ -37,6 +37,14 @@ class PairExp6rx : public Pair { void write_restart_settings(FILE *); void read_restart_settings(FILE *); + struct Param { + double epsilon,rm,alpha; + int ispecies; + char *name, *potential; // names of unique molecules and interaction type + char *tablename; // name of interaction table + int potentialType; // enumerated interaction potential type. + }; + protected: enum{LINEAR}; double cut_global; @@ -48,13 +56,6 @@ class PairExp6rx : public Pair { int *mol2param; // mapping from molecule to parameters int nparams; // # of stored parameter sets int maxparam; // max # of parameter sets - struct Param { - double epsilon,rm,alpha; - int ispecies; - char *name, *potential; // names of unique molecules and interaction type - char *tablename; // name of interaction table - int potentialType; // enumerated interaction potential type. - }; Param *params; // parameter set for an I-J-K interaction int nspecies; From 35803c75c970c4f978a8623a1364fbce8e337126 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 20 Dec 2016 17:03:46 -0700 Subject: [PATCH 026/267] Fixing issues found during GPU runtime testing --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 79 ++++++++++++++++++-------- src/KOKKOS/fix_eos_table_rx_kokkos.h | 6 +- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 9 +-- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 3b22f61e66..cf77e25ff4 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -49,6 +49,13 @@ FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char k_error_flag = DAT::tdual_int_scalar("fix:error_flag"); k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag"); + + k_dHf = DAT::tdual_float_1d("fix:dHf",nspecies); + for (int n = 0; n < nspecies; n++) + k_dHf.h_view(n) = dHf[n]; + k_dHf.modify(); + k_dHf.sync(); + d_dHf = k_dHf.view(); } /* ---------------------------------------------------------------------- */ @@ -73,6 +80,7 @@ void FixEOStableRXKokkos::setup(int vflag) copymode = 1; int nlocal = atom->nlocal; + double boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -82,16 +90,20 @@ void FixEOStableRXKokkos::setup(int vflag) uCGnew = atomKK->k_uCGnew.view(); dvector = atomKK->k_dvector.view(); - atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); - atomKK->modified(execution_space,UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK); - - if (!this->restart_reset) + if (!this->restart_reset) { + atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + } // Communicate the updated momenta and velocities to all nodes + atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); comm->forward_comm_fix(this); + atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,DPDTHETA_MASK); error_check(); @@ -127,6 +139,7 @@ void FixEOStableRXKokkos::init() copymode = 1; int nlocal = atom->nlocal; + double boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -134,13 +147,15 @@ void FixEOStableRXKokkos::init() dpdTheta= atomKK->k_dpdTheta.view(); dvector = atomKK->k_dvector.view(); - atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); - atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK); - - if (this->restart_reset) + if (this->restart_reset) { + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - else + atomKK->modified(execution_space,DPDTHETA_MASK); + } else { + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK); + } error_check(); @@ -172,6 +187,7 @@ void FixEOStableRXKokkos::post_integrate() copymode = 1; int nlocal = atom->nlocal; + double boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -210,6 +226,7 @@ void FixEOStableRXKokkos::end_of_step() copymode = 1; int nlocal = atom->nlocal; + double boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -219,18 +236,24 @@ void FixEOStableRXKokkos::end_of_step() uCGnew = atomKK->k_uCGnew.view(); dvector = atomKK->k_dvector.view(); - atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); - atomKK->modified(execution_space,UCHEM_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK); // Communicate the ghost uCGnew + atomKK->sync(Host,UCG_MASK | UCGNEW_MASK); comm->reverse_comm_fix(this); + atomKK->modified(Host,UCG_MASK | UCGNEW_MASK); + atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); // Communicate the updated momenta and velocities to all nodes + atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); comm->forward_comm_fix(this); + atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,DPDTHETA_MASK); error_check(); @@ -265,13 +288,13 @@ void FixEOStableRXKokkos::energy_lookup(int id, double thetai, doubl //uTmp = tb->e[itable] + fraction*tb->de[itable]; uTmp = d_table_const.e(ispecies,itable) + fraction*d_table_const.de(ispecies,itable); - uTmp += dHf[ispecies]; + uTmp += d_dHf[ispecies]; // mol fraction form: ui += dvector(ispecies,id)*uTmp; nTotal += dvector(ispecies,id); } } - ui = ui - double(nTotal+1.5)*force->boltz*thetai; // need class variable + ui = ui - double(nTotal+1.5)*boltz*thetai; } /* ---------------------------------------------------------------------- @@ -344,13 +367,16 @@ template int FixEOStableRXKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int ii,jj,m; + HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view; + HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view; + HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view; m = 0; for (ii = 0; ii < n; ii++) { jj = list[ii]; - buf[m++] = uChem[jj]; - buf[m++] = uCG[jj]; - buf[m++] = uCGnew[jj]; + buf[m++] = h_uChem[jj]; + buf[m++] = h_uCG[jj]; + buf[m++] = h_uCGnew[jj]; } return m; } @@ -361,13 +387,16 @@ template void FixEOStableRXKokkos::unpack_forward_comm(int n, int first, double *buf) { int ii,m,last; + HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view; + HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view; + HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view; m = 0; last = first + n ; for (ii = first; ii < last; ii++){ - uChem[ii] = buf[m++]; - uCG[ii] = buf[m++]; - uCGnew[ii] = buf[m++]; + h_uChem[ii] = buf[m++]; + h_uCG[ii] = buf[m++]; + h_uCGnew[ii] = buf[m++]; } } @@ -377,12 +406,14 @@ template int FixEOStableRXKokkos::pack_reverse_comm(int n, int first, double *buf) { int i,m,last; + HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view; + HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view; m = 0; last = first + n; for (i = first; i < last; i++) { - buf[m++] = uCG[i]; - buf[m++] = uCGnew[i]; + buf[m++] = h_uCG[i]; + buf[m++] = h_uCGnew[i]; } return m; } @@ -393,13 +424,15 @@ template void FixEOStableRXKokkos::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,m; + HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view; + HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view; m = 0; for (i = 0; i < n; i++) { j = list[i]; - uCG[j] += buf[m++]; - uCGnew[j] += buf[m++]; + h_uCG[j] += buf[m++]; + h_uCGnew[j] += buf[m++]; } } diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h index 3b9a00afe2..d4a5094ae0 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.h +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -105,12 +105,15 @@ class FixEOStableRXKokkos : public FixEOStableRX { int **tabindex; + double boltz; + void allocate(); void error_check(); int update_table; void create_kokkos_tables(); - //double *dHf; + DAT::tdual_float_1d k_dHf; + typename AT::t_float_1d d_dHf; typename AT::t_int_1d mask; typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem; @@ -124,7 +127,6 @@ class FixEOStableRXKokkos : public FixEOStableRX { int pack_forward_comm(int , int *, double *, int, int *); void unpack_forward_comm(int , int , double *); - //int *eosSpecies; }; } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index e6b8a80f44..a2c70ca115 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -645,7 +645,7 @@ void PairExp6rxKokkos::read_file(char *file) int params_per_line = 5; char **words = new char*[params_per_line+1]; - memory->sfree(params); + memory->destroy_kokkos(k_params,params); params = NULL; nparams = maxparam = 0; @@ -723,6 +723,7 @@ void PairExp6rxKokkos::read_file(char *file) // load up parameter settings and error check their values if (nparams == maxparam) { + k_params.template modify(); maxparam += DELTA; memory->grow_kokkos(k_params,params,maxparam, "pair:params"); @@ -823,7 +824,7 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double nTotal += dvector(ispecies,id); nTotal_old += dvector(ispecies+nspecies,id); - iparam = mol2param[ispecies]; + iparam = d_mol2param[ispecies]; if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { @@ -840,7 +841,7 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double fractionOFA = nTotalOFA / nTotal; for (int ispecies = 0; ispecies < nspecies; ispecies++) { - iparam = mol2param[ispecies]; + iparam = d_mol2param[ispecies]; if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; // If Site1 matches a pure species, then grab the parameters @@ -881,7 +882,7 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double xMolei_old = dvector(ispecies+nspecies,id)/nTotalOFA_old; for (int jspecies = 0; jspecies < nspecies; jspecies++) { - jparam = mol2param[jspecies]; + jparam = d_mol2param[jspecies]; if (jparam < 0 || d_params[jparam].potentialType != exp6PotentialType ) continue; if (isite1 == d_params[jparam].ispecies || isite2 == d_params[jparam].ispecies) continue; rmj = d_params[jparam].rm; From 73326922d67c756653f4607222b63ac1b5378139 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 21 Dec 2016 08:56:48 -0700 Subject: [PATCH 027/267] Fixing Kokkos issue in fix_eos_table_rx_kokkos --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index cf77e25ff4..c47923680c 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -80,7 +80,7 @@ void FixEOStableRXKokkos::setup(int vflag) copymode = 1; int nlocal = atom->nlocal; - double boltz = force->boltz; + boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -139,7 +139,7 @@ void FixEOStableRXKokkos::init() copymode = 1; int nlocal = atom->nlocal; - double boltz = force->boltz; + boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -187,7 +187,7 @@ void FixEOStableRXKokkos::post_integrate() copymode = 1; int nlocal = atom->nlocal; - double boltz = force->boltz; + boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); @@ -226,7 +226,7 @@ void FixEOStableRXKokkos::end_of_step() copymode = 1; int nlocal = atom->nlocal; - double boltz = force->boltz; + boltz = force->boltz; mask = atomKK->k_mask.view(); uCond = atomKK->k_uCond.view(); uMech = atomKK->k_uMech.view(); From 807d9529da32778eb6dbc2216f6a72b1e1325d3c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 21 Dec 2016 10:41:29 -0700 Subject: [PATCH 028/267] Fixing issues found during GPU runtime testing --- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 8 +++--- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 29 ++++++++++++--------- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 3 ++- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 5 +--- src/KOKKOS/pair_exp6_rx_kokkos.h | 1 - 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index d54abeceb0..afe6b54e90 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -910,8 +910,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); @@ -1163,8 +1163,8 @@ namespace Kokkos { double S = 2.0; double U; while(S>=1.0) { - U = drand(); - const double V = drand(); + U = 2.0*drand() - 1.0; + const double V = 2.0*drand() - 1.0; S = U*U+V*V; } return U*sqrt(-2.0*log(S)/S); diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 3b49f43246..310f4689cb 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -138,10 +138,14 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) k_cutsq.template sync(); k_params.template sync(); - atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDTHETA_MASK | RMASS_MASK); + atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK); - else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); - atomKK->k_mass.sync(); + else atomKK->modified(execution_space,F_MASK); + + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; nlocal = atom->nlocal; int nghost = atom->nghost; @@ -155,6 +159,7 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; boltz = force->boltz; + ftm2v = force->ftm2v; int STACKPARAMS = 0; // optimize @@ -195,7 +200,8 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) h_duMech = k_duMech.h_view; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal+nghost),*this); - atomKK->sync(execution_space,V_MASK); + atomKK->sync(execution_space,V_MASK | DPDTHETA_MASK | RMASS_MASK); + atomKK->k_mass.sync(); // loop over neighbors of my atoms @@ -219,15 +225,12 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) // Communicate the ghost delta energies to the locally owned atoms + // this memory transfer can be removed when fix_dpd_fdt_energy_kokkos is added k_duCond.template modify(); k_duCond.template sync(); k_duMech.template modify(); k_duMech.template sync(); comm->reverse_comm_pair(this); - //k_duCond.template modify(); - //k_duCond.template sync(); - //k_duMech.template modify(); - //k_duMech.template sync(); } if (eflag_global) eng_vdwl += ev.evdwl; @@ -335,7 +338,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp ev.evdwl += evdwl; } - if (EVFLAG) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + if (EVFLAG) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } } @@ -437,9 +440,9 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo fy_i += dely*fpair; fz_i += delz*fpair; if (NEWTON_PAIR || j < nlocal) { - f(j,0) -= delx*fpair; - f(j,1) -= dely*fpair; - f(j,2) -= delz*fpair; + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; } if (rmass) { @@ -454,7 +457,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo // Compute the mechanical and conductive energy, uMech and uCond mu_ij = massinv_i + massinv_j; - mu_ij *= force->ftm2v; + mu_ij *= ftm2v; uTmp = gamma_ij*wd*rinv*rinv*dot*dot - 0.5*sigma_ij*sigma_ij*mu_ij*wd; diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 67fa315721..8e7d01de2a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -89,7 +89,8 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { int nlocal,neighflag; int STACKPARAMS; double dtinvsqrt; - double boltz; + double boltz,ftm2v; + double special_lj[4]; virtual void allocate(); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index a2c70ca115..8ab7d62324 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -148,10 +148,6 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) special_lj[1] = force->special_lj[1]; special_lj[2] = force->special_lj[2]; special_lj[3] = force->special_lj[3]; - special_coul[0] = force->special_coul[0]; - special_coul[1] = force->special_coul[1]; - special_coul[2] = force->special_coul[2]; - special_coul[3] = force->special_coul[3]; newton_pair = force->newton_pair; atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -595,6 +591,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputetemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 366cf99d75..7dfe20fc22 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -89,7 +89,6 @@ class PairExp6rxKokkos : public PairExp6rx { protected: int eflag,vflag; int nlocal,newton_pair,neighflag; - double special_coul[4]; double special_lj[4]; typename AT::t_x_array_randomread x; From 163b61a32eff89364b99ca0e9dbde28364a65ded Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 21 Dec 2016 15:37:00 -0700 Subject: [PATCH 029/267] Removing pair_table_rx_kokkos from Kokkos Install.sh since it isn't ready for runtime testing --- src/KOKKOS/Install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 94be32cc32..17e9f93c9d 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -190,8 +190,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h -action pair_table_rx_kokkos.cpp pair_table_rx.cpp -action pair_table_rx_kokkos.h pair_table_rx.h +#action pair_table_rx_kokkos.cpp pair_table_rx.cpp +#action pair_table_rx_kokkos.h pair_table_rx.h action pair_tersoff_kokkos.cpp pair_tersoff.cpp action pair_tersoff_kokkos.h pair_tersoff.h action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp From f6fe61196da6acb067c321433dcfa31dba8fc39e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 22 Dec 2016 11:34:17 -0700 Subject: [PATCH 030/267] CPU runtime tested version of pair_multi_lucy_rx_kokkos --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 9 ++- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 11 ++-- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 70 ++++++++++------------- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 30 ++++------ src/USER-DPD/pair_multi_lucy_rx.h | 12 ++-- 5 files changed, 61 insertions(+), 71 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 310f4689cb..133d366fbc 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -58,6 +58,9 @@ PairDPDfdtEnergyKokkos::~PairDPDfdtEnergyKokkos() { if (copymode) return; + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + if (allocated) { memory->destroy_kokkos(k_duCond,duCond); memory->destroy_kokkos(k_duMech,duMech); @@ -335,7 +338,8 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp // eng shifted to 0.0 at cutoff evdwl = 0.5*a0_ij*cut_ij * wd; evdwl *= factor_dpd; - ev.evdwl += evdwl; + if (EVFLAG) + ev.evdwl += ((NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); @@ -489,7 +493,8 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo // eng shifted to 0.0 at cutoff evdwl = 0.5*a0_ij*cut_ij * wd; evdwl *= factor_dpd; - ev.evdwl += evdwl; + if (EVFLAG) + ev.evdwl += ((NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 8ab7d62324..559948067d 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -153,6 +153,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); + k_cutsq.template sync(); // Initialize the Exp6 parameter data for both the local // and ghost atoms. Make the parameter data persistent @@ -495,7 +496,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxComputetemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } @@ -630,6 +632,7 @@ void PairExp6rxKokkos::allocate() memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view(); + k_cutsq.template modify(); memory->create(cut,n+1,n+1,"pair:cut_lj"); } diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 76337b5219..1dc8ccbae9 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -71,7 +71,7 @@ PairMultiLucyRXKokkos::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - update_table = 0; + update_table = 1; h_table = new TableHost(); d_table = new TableDevice(); @@ -85,8 +85,14 @@ PairMultiLucyRXKokkos::~PairMultiLucyRXKokkos() { if (copymode) return; + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + + memory->destroy_kokkos(k_cutsq,cutsq); + delete h_table; delete d_table; + tabindex = NULL; } /* ---------------------------------------------------------------------- */ @@ -123,6 +129,8 @@ void PairMultiLucyRXKokkos::init_style() template void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) { + copymode = 1; + if (update_table) create_kokkos_tables(); @@ -130,6 +138,8 @@ void PairMultiLucyRXKokkos::compute(int eflag_in, int vflag_in) compute_style(eflag_in,vflag_in); else if(tabstyle == LINEAR) compute_style(eflag_in,vflag_in); + + copymode = 0; } /* ---------------------------------------------------------------------- */ @@ -138,15 +148,9 @@ template template void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in) { - copymode = 1; - eflag = eflag_in; vflag = vflag_in; - double evdwl,evdwlOld; - - evdwlOld = 0.0; - evdwl = 0.0; if (neighflag == FULL) no_virial_fdotr_compute = 1; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -175,6 +179,7 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); + k_cutsq.template sync(); nlocal = atom->nlocal; int nghost = atom->nghost; @@ -250,8 +255,6 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in k_vatom.template modify(); k_vatom.template sync(); } - - copymode = 0; } template @@ -316,10 +319,12 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeinnersq || rho[j]*rho[j] < tb->innersq){ if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){ k_error_flag.d_view() = 1; } + if (TABSTYLE == LOOKUP) { //itable = static_cast (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); itable = static_cast (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); @@ -338,6 +343,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute ((rho[i]*rho[i] - tb->innersq) * tb->invdelta); itable = static_cast ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); //jtable = static_cast (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); @@ -395,8 +401,9 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta); itable = static_cast (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); //if (TABSTYLE == LOOKUP) evdwl = tb->e[itable]; - if (TABSTYLE == LOOKUP) evdwl = d_table_const.e(tidx,itable); - else if (TABSTYLE == LINEAR){ + if (TABSTYLE == LOOKUP) { + evdwl = d_table_const.e(tidx,itable); + } else if (TABSTYLE == LINEAR) { if (itable >= tlm1){ k_error_flag.d_view() = 2; } @@ -404,7 +411,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputersq[itable]) * tb->invdelta); else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); //evdwl = tb->e[itable] + fraction_i*tb->de[itable]; - evdwl = d_table_const.e(tidx,itable); + fraction_i*d_table_const.de(tidx,itable); + evdwl = d_table_const.e(tidx,itable) + fraction_i*d_table_const.de(tidx,itable); } else k_error_flag.d_view() = 3; evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; @@ -417,7 +424,8 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute @@ -433,8 +441,6 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute void PairMultiLucyRXKokkos::computeLocalDensity() { - copymode = 1; - x = atomKK->k_x.view(); type = atomKK->k_type.view(); rho = atomKK->k_rho.view(); @@ -491,8 +497,6 @@ void PairMultiLucyRXKokkos::computeLocalDensity() } comm->forward_comm_pair(this); - - copymode = 0; } template @@ -506,6 +510,7 @@ template KOKKOS_INLINE_FUNCTION void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLocalDensity, const int &ii) const { + // The rho array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_rho = rho; @@ -565,6 +570,7 @@ void PairMultiLucyRXKokkos::getParams(int id, double &fractionOld1, double fractionOld, fraction; double nTotal, nTotalOld; + nTotal = 0.0; nTotalOld = 0.0; for (int ispecies = 0; ispecies < nspecies; ispecies++){ @@ -796,7 +802,6 @@ void PairMultiLucyRXKokkos::create_kokkos_tables() memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); - memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); if(tabstyle == LOOKUP) { memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); @@ -816,12 +821,9 @@ void PairMultiLucyRXKokkos::create_kokkos_tables() h_table->innersq[i] = tb->innersq; h_table->invdelta[i] = tb->invdelta; - h_table->deltasq6[i] = tb->deltasq6; for(int j = 0; jrsq.dimension_1(); j++) h_table->rsq(i,j) = tb->rsq[j]; - for(int j = 0; jdrsq.dimension_1(); j++) - h_table->drsq(i,j) = tb->drsq[j]; for(int j = 0; je.dimension_1(); j++) h_table->e(i,j) = tb->e[j]; for(int j = 0; jde.dimension_1(); j++) @@ -830,40 +832,26 @@ void PairMultiLucyRXKokkos::create_kokkos_tables() h_table->f(i,j) = tb->f[j]; for(int j = 0; jdf.dimension_1(); j++) h_table->df(i,j) = tb->df[j]; - for(int j = 0; je2.dimension_1(); j++) - h_table->e2(i,j) = tb->e2[j]; - for(int j = 0; jf2.dimension_1(); j++) - h_table->f2(i,j) = tb->f2[j]; } Kokkos::deep_copy(d_table->innersq,h_table->innersq); Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); - Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); Kokkos::deep_copy(d_table->rsq,h_table->rsq); - Kokkos::deep_copy(d_table->drsq,h_table->drsq); Kokkos::deep_copy(d_table->e,h_table->e); Kokkos::deep_copy(d_table->de,h_table->de); Kokkos::deep_copy(d_table->f,h_table->f); Kokkos::deep_copy(d_table->df,h_table->df); - Kokkos::deep_copy(d_table->e2,h_table->e2); - Kokkos::deep_copy(d_table->f2,h_table->f2); Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); d_table_const.innersq = d_table->innersq; d_table_const.invdelta = d_table->invdelta; - d_table_const.deltasq6 = d_table->deltasq6; d_table_const.rsq = d_table->rsq; - d_table_const.drsq = d_table->drsq; d_table_const.e = d_table->e; d_table_const.de = d_table->de; d_table_const.f = d_table->f; d_table_const.df = d_table->df; - d_table_const.e2 = d_table->e2; - d_table_const.f2 = d_table->f2; - - Kokkos::deep_copy(d_table->cutsq,h_table->cutsq); update_table = 0; } @@ -878,11 +866,14 @@ void PairMultiLucyRXKokkos::allocate() const int nt = atom->ntypes + 1; memory->create(setflag,nt,nt,"pair:setflag"); - memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); - memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); - d_table_const.cutsq = d_table->cutsq; + memory->create_kokkos(k_cutsq,cutsq,nt,nt,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + k_cutsq.template modify(); + + memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); d_table_const.tabindex = d_table->tabindex; + memset(&setflag[0][0],0,nt*nt*sizeof(int)); memset(&cutsq[0][0],0,nt*nt*sizeof(double)); memset(&tabindex[0][0],0,nt*nt*sizeof(int)); @@ -916,9 +907,6 @@ void PairMultiLucyRXKokkos::settings(int narg, char **arg) d_table_const.tabindex = d_table->tabindex = typename ArrayTypes::t_int_2d(); h_table->tabindex = typename ArrayTypes::t_int_2d(); - - d_table_const.cutsq = d_table->cutsq = typename ArrayTypes::t_ffloat_2d(); - h_table->cutsq = typename ArrayTypes::t_ffloat_2d(); } allocated = 0; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index b205f00796..a6622ac4ec 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -120,44 +120,38 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { //}; /*struct TableDeviceConst { - typename ArrayTypes::t_ffloat_2d_randomread cutsq; - typename ArrayTypes::t_int_2d_randomread tabindex; - typename ArrayTypes::t_ffloat_1d_randomread innersq,invdelta,deltasq6; - typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + typename AT::t_int_2d_randomread tabindex; + typename AT::t_ffloat_1d_randomread innersq,invdelta; + typename AT::t_ffloat_2d_randomread rsq,e,de,f,df; };*/ //Its faster not to use texture fetch if the number of tables is less than 32! struct TableDeviceConst { - typename ArrayTypes::t_ffloat_2d cutsq; - typename ArrayTypes::t_int_2d tabindex; - typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; - typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; + typename AT::t_int_2d tabindex; + typename AT::t_ffloat_1d innersq,invdelta; + typename AT::t_ffloat_2d_randomread rsq,e,de,f,df; }; struct TableDevice { - typename ArrayTypes::t_ffloat_2d cutsq; - typename ArrayTypes::t_int_2d tabindex; - typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; - typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + typename AT::t_int_2d tabindex; + typename AT::t_ffloat_1d innersq,invdelta; + typename AT::t_ffloat_2d rsq,e,de,f,df; }; struct TableHost { - typename ArrayTypes::t_ffloat_2d cutsq; - typename ArrayTypes::t_int_2d tabindex; - typename ArrayTypes::t_ffloat_1d innersq,invdelta,deltasq6; - typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; + HAT::t_int_2d tabindex; + HAT::t_ffloat_1d innersq,invdelta; + HAT::t_ffloat_2d rsq,e,de,f,df; }; TableDeviceConst d_table_const; TableDevice* d_table; TableHost* h_table; - int **tabindex; F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; void allocate(); int update_table; void create_kokkos_tables(); - void cleanup_copy(); KOKKOS_INLINE_FUNCTION void getParams(int, double &, double &, double &, double &) const; diff --git a/src/USER-DPD/pair_multi_lucy_rx.h b/src/USER-DPD/pair_multi_lucy_rx.h index 2913716c5a..0562739c50 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.h +++ b/src/USER-DPD/pair_multi_lucy_rx.h @@ -30,17 +30,17 @@ class PairMultiLucyRX : public Pair { virtual ~PairMultiLucyRX(); virtual void compute(int, int); - void settings(int, char **); + virtual void settings(int, char **); void coeff(int, char **); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); void write_restart_settings(FILE *); void read_restart_settings(FILE *); - int pack_forward_comm(int, int *, double *, int, int *); - void unpack_forward_comm(int, int, double *); - int pack_reverse_comm(int, int, double *); - void unpack_reverse_comm(int, int *, double *); + virtual int pack_forward_comm(int, int *, double *, int, int *); + virtual void unpack_forward_comm(int, int, double *); + virtual int pack_reverse_comm(int, int, double *); + virtual void unpack_reverse_comm(int, int *, double *); void computeLocalDensity(); double rho_0; @@ -64,7 +64,7 @@ class PairMultiLucyRX : public Pair { int **tabindex; - void allocate(); + virtual void allocate(); void read_table(Table *, char *, char *); void param_extract(Table *, char *); void bcast_table(Table *); From a4ab877c4672b919ed5807864f5db726ef522926 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 22 Dec 2016 13:16:57 -0700 Subject: [PATCH 031/267] Change to allow pair_dpd_fdt_energy_kokkos --- src/USER-DPD/fix_dpd_energy.cpp | 2 ++ src/USER-DPD/fix_rx.cpp | 3 +++ src/USER-DPD/fix_shardlow.cpp | 2 ++ 3 files changed, 7 insertions(+) diff --git a/src/USER-DPD/fix_dpd_energy.cpp b/src/USER-DPD/fix_dpd_energy.cpp index 05907a5fcf..475e12f02f 100644 --- a/src/USER-DPD/fix_dpd_energy.cpp +++ b/src/USER-DPD/fix_dpd_energy.cpp @@ -34,6 +34,8 @@ FixDPDenergy::FixDPDenergy(LAMMPS *lmp, int narg, char **arg) : pairDPDE = NULL; pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1); + if (pairDPDE == NULL) + pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1); if (pairDPDE == NULL) error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix dpd/energy"); diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index df67cf4035..0bd560b241 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -627,6 +627,9 @@ int FixRX::setmask() void FixRX::init() { pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1); + if (pairDPDE == NULL) + pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1); + if (pairDPDE == NULL) error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix rx"); diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 28c5382237..541f4ba3c3 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -94,6 +94,8 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) : pairDPDE = NULL; pairDPD = (PairDPDfdt *) force->pair_match("dpd/fdt",1); pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1); + if (pairDPDE == NULL) + pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1); if(pairDPDE){ comm_forward = 3; From a36e563aa56e8e3a31f19f8b010301c3b170b941 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 22 Dec 2016 14:37:42 -0700 Subject: [PATCH 032/267] Temporarily reverting change to pair_table_kokkos to allow runtime testing --- src/KOKKOS/pair_table_kokkos.cpp | 758 ++++++++++++++++++++++++++++++- src/KOKKOS/pair_table_kokkos.h | 44 +- 2 files changed, 790 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index b8b647964c..5230d1a91f 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Christian Trott (SNL) + Contributing author: Paul Crozier (SNL) ------------------------------------------------------------------------- */ #include @@ -41,7 +41,7 @@ enum{FULL,HALFTHREAD,HALF}; /* ---------------------------------------------------------------------- */ template -PairTableKokkos::PairTableKokkos(LAMMPS *lmp) : PairTable(lmp) +PairTableKokkos::PairTableKokkos(LAMMPS *lmp) : Pair(lmp) { update_table = 0; atomKK = (AtomKokkos *) atom; @@ -98,7 +98,6 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -196,7 +195,6 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c //if (rsq < d_table_const.innersq(tidx)) // error->one(FLERR,"Pair distance < table inner cutoff"); - if (Specialisation::TabStyle == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); //if (itable >= tlm1) @@ -314,6 +312,8 @@ void PairTableKokkos::create_kokkos_tables() memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); } + + for(int i=0; i < ntables; i++) { Table* tb = &tables[i]; @@ -451,6 +451,85 @@ void PairTableKokkos::settings(int narg, char **arg) tables = NULL; } +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::coeff(int narg, char **arg) +{ + if (narg != 4 && narg != 5) error->all(FLERR,"Illegal pair_coeff command"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + int me; + MPI_Comm_rank(world,&me); + tables = (Table *) + memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); + Table *tb = &tables[ntables]; + null_table(tb); + if (me == 0) read_table(tb,arg[2],arg[3]); + bcast_table(tb); + + // set table cutoff + + if (narg == 5) tb->cut = force->numeric(FLERR,arg[4]); + else if (tb->rflag) tb->cut = tb->rhi; + else tb->cut = tb->rfile[tb->ninput-1]; + + // error check on table parameters + // insure cutoff is within table + // for BITMAP tables, file values can be in non-ascending order + + if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); + double rlo,rhi; + if (tb->rflag == 0) { + rlo = tb->rfile[0]; + rhi = tb->rfile[tb->ninput-1]; + } else { + rlo = tb->rlo; + rhi = tb->rhi; + } + if (tb->cut <= rlo || tb->cut > rhi) + error->all(FLERR,"Invalid pair table cutoff"); + if (rlo <= 0.0) error->all(FLERR,"Invalid pair table cutoff"); + + // match = 1 if don't need to spline read-in tables + // this is only the case if r values needed by final tables + // exactly match r values read from file + // for tabstyle SPLINE, always need to build spline tables + + tb->match = 0; + if (tabstyle == LINEAR && tb->ninput == tablength && + tb->rflag == RSQ && tb->rhi == tb->cut) tb->match = 1; + if (tabstyle == BITMAP && tb->ninput == 1 << tablength && + tb->rflag == BMP && tb->rhi == tb->cut) tb->match = 1; + if (tb->rflag == BMP && tb->match == 0) + error->all(FLERR,"Bitmapped table in file does not match requested table"); + + // spline read-in values and compute r,e,f vectors within table + + if (tb->match == 0) spline_table(tb); + compute_table(tb); + + // store ptr to table in tabindex + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + tabindex[i][j] = ntables; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); + ntables++; +} + /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -469,6 +548,677 @@ double PairTableKokkos::init_one(int i, int j) return tables[tabindex[i][j]].cut; } +/* ---------------------------------------------------------------------- + read a table section from a tabulated potential file + only called by proc 0 + this function sets these values in Table: + ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::read_table(Table *tb, char *file, char *keyword) +{ + char line[MAXLINE]; + + // open file + + FILE *fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open file %s",file); + error->one(FLERR,str); + } + + // loop until section found with matching keyword + + while (1) { + if (fgets(line,MAXLINE,fp) == NULL) + error->one(FLERR,"Did not find keyword in table file"); + if (strspn(line," \t\n\r") == strlen(line)) continue; // blank line + if (line[0] == '#') continue; // comment + char *word = strtok(line," \t\n\r"); + if (strcmp(word,keyword) == 0) break; // matching keyword + fgets(line,MAXLINE,fp); // no match, skip section + param_extract(tb,line); + fgets(line,MAXLINE,fp); + for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp); + } + + // read args on 2nd line of section + // allocate table arrays for file values + + fgets(line,MAXLINE,fp); + param_extract(tb,line); + memory->create(tb->rfile,tb->ninput,"pair:rfile"); + memory->create(tb->efile,tb->ninput,"pair:efile"); + memory->create(tb->ffile,tb->ninput,"pair:ffile"); + + // setup bitmap parameters for table to read in + + tb->ntablebits = 0; + int masklo,maskhi,nmask,nshiftbits; + if (tb->rflag == BMP) { + while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++; + if (1 << tb->ntablebits != tb->ninput) + error->one(FLERR,"Bitmapped table is incorrect length in table file"); + init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits); + } + + // read r,e,f table values from file + // if rflag set, compute r + // if rflag not set, use r from file + + int itmp; + double rtmp; + union_int_float_t rsq_lookup; + + fgets(line,MAXLINE,fp); + for (int i = 0; i < tb->ninput; i++) { + fgets(line,MAXLINE,fp); + sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]); + + if (tb->rflag == RLINEAR) + rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1); + else if (tb->rflag == RSQ) { + rtmp = tb->rlo*tb->rlo + + (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1); + rtmp = sqrt(rtmp); + } else if (tb->rflag == BMP) { + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tb->rlo*tb->rlo) { + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= maskhi; + } + rtmp = sqrtf(rsq_lookup.f); + } + + tb->rfile[i] = rtmp; + } + + // close file + + fclose(fp); +} + +/* ---------------------------------------------------------------------- + broadcast read-in table info from proc 0 to other procs + this function communicates these values in Table: + ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::bcast_table(Table *tb) +{ + MPI_Bcast(&tb->ninput,1,MPI_INT,0,world); + + int me; + MPI_Comm_rank(world,&me); + if (me > 0) { + memory->create(tb->rfile,tb->ninput,"pair:rfile"); + memory->create(tb->efile,tb->ninput,"pair:efile"); + memory->create(tb->ffile,tb->ninput,"pair:ffile"); + } + + MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world); + MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world); + MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world); + + MPI_Bcast(&tb->rflag,1,MPI_INT,0,world); + if (tb->rflag) { + MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world); + MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world); + } + MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world); + if (tb->fpflag) { + MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world); + MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world); + } +} + +/* ---------------------------------------------------------------------- + build spline representation of e,f over entire range of read-in table + this function sets these values in Table: e2file,f2file +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::spline_table(Table *tb) +{ + memory->create(tb->e2file,tb->ninput,"pair:e2file"); + memory->create(tb->f2file,tb->ninput,"pair:f2file"); + + double ep0 = - tb->ffile[0]; + double epn = - tb->ffile[tb->ninput-1]; + spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file); + + if (tb->fpflag == 0) { + tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]); + tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) / + (tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]); + } + + double fp0 = tb->fplo; + double fpn = tb->fphi; + spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file); +} + +/* ---------------------------------------------------------------------- + extract attributes from parameter line in table section + format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi + N is required, other params are optional +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::param_extract(Table *tb, char *line) +{ + tb->ninput = 0; + tb->rflag = NONE; + tb->fpflag = 0; + + char *word = strtok(line," \t\n\r\f"); + while (word) { + if (strcmp(word,"N") == 0) { + word = strtok(NULL," \t\n\r\f"); + tb->ninput = atoi(word); + } else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 || + strcmp(word,"BITMAP") == 0) { + if (strcmp(word,"R") == 0) tb->rflag = RLINEAR; + else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ; + else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP; + word = strtok(NULL," \t\n\r\f"); + tb->rlo = atof(word); + word = strtok(NULL," \t\n\r\f"); + tb->rhi = atof(word); + } else if (strcmp(word,"FP") == 0) { + tb->fpflag = 1; + word = strtok(NULL," \t\n\r\f"); + tb->fplo = atof(word); + word = strtok(NULL," \t\n\r\f"); + tb->fphi = atof(word); + } else { + error->one(FLERR,"Invalid keyword in pair table parameters"); + } + word = strtok(NULL," \t\n\r\f"); + } + + if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N"); +} + +/* ---------------------------------------------------------------------- + compute r,e,f vectors from splined values +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::compute_table(Table *tb) +{ + update_table = 1; + int tlm1 = tablength-1; + + // inner = inner table bound + // cut = outer table bound + // delta = table spacing in rsq for N-1 bins + + double inner; + if (tb->rflag) inner = tb->rlo; + else inner = tb->rfile[0]; + tb->innersq = inner*inner; + tb->delta = (tb->cut*tb->cut - tb->innersq) / tlm1; + tb->invdelta = 1.0/tb->delta; + + // direct lookup tables + // N-1 evenly spaced bins in rsq from inner to cut + // e,f = value at midpt of bin + // e,f are N-1 in length since store 1 value at bin midpt + // f is converted to f/r when stored in f[i] + // e,f are never a match to read-in values, always computed via spline interp + + if (tabstyle == LOOKUP) { + memory->create(tb->e,tlm1,"pair:e"); + memory->create(tb->f,tlm1,"pair:f"); + + double r,rsq; + for (int i = 0; i < tlm1; i++) { + rsq = tb->innersq + (i+0.5)*tb->delta; + r = sqrt(rsq); + tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); + tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; + } + } + + // linear tables + // N-1 evenly spaced bins in rsq from inner to cut + // rsq,e,f = value at lower edge of bin + // de,df values = delta from lower edge to upper edge of bin + // rsq,e,f are N in length so de,df arrays can compute difference + // f is converted to f/r when stored in f[i] + // e,f can match read-in values, else compute via spline interp + + if (tabstyle == LINEAR) { + memory->create(tb->rsq,tablength,"pair:rsq"); + memory->create(tb->e,tablength,"pair:e"); + memory->create(tb->f,tablength,"pair:f"); + memory->create(tb->de,tlm1,"pair:de"); + memory->create(tb->df,tlm1,"pair:df"); + + double r,rsq; + for (int i = 0; i < tablength; i++) { + rsq = tb->innersq + i*tb->delta; + r = sqrt(rsq); + tb->rsq[i] = rsq; + if (tb->match) { + tb->e[i] = tb->efile[i]; + tb->f[i] = tb->ffile[i]/r; + } else { + tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); + tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; + } + } + + for (int i = 0; i < tlm1; i++) { + tb->de[i] = tb->e[i+1] - tb->e[i]; + tb->df[i] = tb->f[i+1] - tb->f[i]; + } + } + + // cubic spline tables + // N-1 evenly spaced bins in rsq from inner to cut + // rsq,e,f = value at lower edge of bin + // e2,f2 = spline coefficient for each bin + // rsq,e,f,e2,f2 are N in length so have N-1 spline bins + // f is converted to f/r after e is splined + // e,f can match read-in values, else compute via spline interp + + if (tabstyle == SPLINE) { + memory->create(tb->rsq,tablength,"pair:rsq"); + memory->create(tb->e,tablength,"pair:e"); + memory->create(tb->f,tablength,"pair:f"); + memory->create(tb->e2,tablength,"pair:e2"); + memory->create(tb->f2,tablength,"pair:f2"); + + tb->deltasq6 = tb->delta*tb->delta / 6.0; + + double r,rsq; + for (int i = 0; i < tablength; i++) { + rsq = tb->innersq + i*tb->delta; + r = sqrt(rsq); + tb->rsq[i] = rsq; + if (tb->match) { + tb->e[i] = tb->efile[i]; + tb->f[i] = tb->ffile[i]/r; + } else { + tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); + tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r); + } + } + + // ep0,epn = dh/dg at inner and at cut + // h(r) = e(r) and g(r) = r^2 + // dh/dg = (de/dr) / 2r = -f/2r + + double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq)); + double epn = - tb->f[tlm1] / (2.0 * tb->cut); + spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2); + + // fp0,fpn = dh/dg at inner and at cut + // h(r) = f(r)/r and g(r) = r^2 + // dh/dg = (1/r df/dr - f/r^2) / 2r + // dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1)) + + double fp0,fpn; + double secant_factor = 0.1; + if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) / + (2.0 * sqrt(tb->innersq)); + else { + double rsq1 = tb->innersq; + double rsq2 = rsq1 + secant_factor*tb->delta; + fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) / + sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta); + } + + if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn = + (tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut); + else { + double rsq2 = tb->cut * tb->cut; + double rsq1 = rsq2 - secant_factor*tb->delta; + fpn = (tb->f[tlm1] / sqrt(rsq2) - + splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) / + sqrt(rsq1)) / (secant_factor*tb->delta); + } + + for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]); + spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2); + } + + // bitmapped linear tables + // 2^N bins from inner to cut, spaced in bitmapped manner + // f is converted to f/r when stored in f[i] + // e,f can match read-in values, else compute via spline interp + + if (tabstyle == BITMAP) { + double r; + union_int_float_t rsq_lookup; + int masklo,maskhi; + + // linear lookup tables of length ntable = 2^n + // stored value = value at lower edge of bin + + init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits); + int ntable = 1 << tablength; + int ntablem1 = ntable - 1; + + memory->create(tb->rsq,ntable,"pair:rsq"); + memory->create(tb->e,ntable,"pair:e"); + memory->create(tb->f,ntable,"pair:f"); + memory->create(tb->de,ntable,"pair:de"); + memory->create(tb->df,ntable,"pair:df"); + memory->create(tb->drsq,ntable,"pair:drsq"); + + union_int_float_t minrsq_lookup; + minrsq_lookup.i = 0 << tb->nshiftbits; + minrsq_lookup.i |= maskhi; + + for (int i = 0; i < ntable; i++) { + rsq_lookup.i = i << tb->nshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tb->innersq) { + rsq_lookup.i = i << tb->nshiftbits; + rsq_lookup.i |= maskhi; + } + r = sqrtf(rsq_lookup.f); + tb->rsq[i] = rsq_lookup.f; + if (tb->match) { + tb->e[i] = tb->efile[i]; + tb->f[i] = tb->ffile[i]/r; + } else { + tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); + tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; + } + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); + } + + tb->innersq = minrsq_lookup.f; + + for (int i = 0; i < ntablem1; i++) { + tb->de[i] = tb->e[i+1] - tb->e[i]; + tb->df[i] = tb->f[i+1] - tb->f[i]; + tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]); + } + + // get the delta values for the last table entries + // tables are connected periodically between 0 and ntablem1 + + tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1]; + tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1]; + tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]); + + // get the correct delta values at itablemax + // smallest r is in bin itablemin + // largest r is in bin itablemax, which is itablemin-1, + // or ntablem1 if itablemin=0 + + // deltas at itablemax only needed if corresponding rsq < cut*cut + // if so, compute deltas between rsq and cut*cut + // if tb->match, data at cut*cut is unavailable, so we'll take + // deltas at itablemax-1 as a good approximation + + double e_tmp,f_tmp; + int itablemin = minrsq_lookup.i & tb->nmask; + itablemin >>= tb->nshiftbits; + int itablemax = itablemin - 1; + if (itablemin == 0) itablemax = ntablem1; + int itablemaxm1 = itablemax - 1; + if (itablemax == 0) itablemaxm1 = ntablem1; + rsq_lookup.i = itablemax << tb->nshiftbits; + rsq_lookup.i |= maskhi; + if (rsq_lookup.f < tb->cut*tb->cut) { + if (tb->match) { + tb->de[itablemax] = tb->de[itablemaxm1]; + tb->df[itablemax] = tb->df[itablemaxm1]; + tb->drsq[itablemax] = tb->drsq[itablemaxm1]; + } else { + rsq_lookup.f = tb->cut*tb->cut; + r = sqrtf(rsq_lookup.f); + e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); + f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; + tb->de[itablemax] = e_tmp - tb->e[itablemax]; + tb->df[itablemax] = f_tmp - tb->f[itablemax]; + tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]); + } + } + } +} + +/* ---------------------------------------------------------------------- + set all ptrs in a table to NULL, so can be freed safely +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::null_table(Table *tb) +{ + tb->rfile = tb->efile = tb->ffile = NULL; + tb->e2file = tb->f2file = NULL; + tb->rsq = tb->drsq = tb->e = tb->de = NULL; + tb->f = tb->df = tb->e2 = tb->f2 = NULL; +} + +/* ---------------------------------------------------------------------- + free all arrays in a table +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::free_table(Table *tb) +{ + memory->destroy(tb->rfile); + memory->destroy(tb->efile); + memory->destroy(tb->ffile); + memory->destroy(tb->e2file); + memory->destroy(tb->f2file); + + memory->destroy(tb->rsq); + memory->destroy(tb->drsq); + memory->destroy(tb->e); + memory->destroy(tb->de); + memory->destroy(tb->f); + memory->destroy(tb->df); + memory->destroy(tb->e2); + memory->destroy(tb->f2); +} + +/* ---------------------------------------------------------------------- + spline and splint routines modified from Numerical Recipes +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::spline(double *x, double *y, int n, + double yp1, double ypn, double *y2) +{ + int i,k; + double p,qn,sig,un; + double *u = new double[n]; + + if (yp1 > 0.99e30) y2[0] = u[0] = 0.0; + else { + y2[0] = -0.5; + u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1); + } + for (i = 1; i < n-1; i++) { + sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]); + p = sig*y2[i-1] + 2.0; + y2[i] = (sig-1.0) / p; + u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]); + u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p; + } + if (ypn > 0.99e30) qn = un = 0.0; + else { + qn = 0.5; + un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2])); + } + y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0); + for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k]; + + delete [] u; +} + +/* ---------------------------------------------------------------------- */ + +template +double PairTableKokkos::splint(double *xa, double *ya, double *y2a, int n, double x) +{ + int klo,khi,k; + double h,b,a,y; + + klo = 0; + khi = n-1; + while (khi-klo > 1) { + k = (khi+klo) >> 1; + if (xa[k] > x) khi = k; + else klo = k; + } + h = xa[khi]-xa[klo]; + a = (xa[khi]-x) / h; + b = (x-xa[klo]) / h; + y = a*ya[klo] + b*ya[khi] + + ((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0; + return y; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::write_restart(FILE *fp) +{ + write_restart_settings(fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::read_restart(FILE *fp) +{ + read_restart_settings(fp); + allocate(); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::write_restart_settings(FILE *fp) +{ + fwrite(&tabstyle,sizeof(int),1,fp); + fwrite(&tablength,sizeof(int),1,fp); + fwrite(&ewaldflag,sizeof(int),1,fp); + fwrite(&pppmflag,sizeof(int),1,fp); + fwrite(&msmflag,sizeof(int),1,fp); + fwrite(&dispersionflag,sizeof(int),1,fp); + fwrite(&tip4pflag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +template +void PairTableKokkos::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + fread(&tabstyle,sizeof(int),1,fp); + fread(&tablength,sizeof(int),1,fp); + fread(&ewaldflag,sizeof(int),1,fp); + fread(&pppmflag,sizeof(int),1,fp); + fread(&msmflag,sizeof(int),1,fp); + fread(&dispersionflag,sizeof(int),1,fp); + fread(&tip4pflag,sizeof(int),1,fp); + } + MPI_Bcast(&tabstyle,1,MPI_INT,0,world); + MPI_Bcast(&tablength,1,MPI_INT,0,world); + MPI_Bcast(&ewaldflag,1,MPI_INT,0,world); + MPI_Bcast(&pppmflag,1,MPI_INT,0,world); + MPI_Bcast(&msmflag,1,MPI_INT,0,world); + MPI_Bcast(&dispersionflag,1,MPI_INT,0,world); + MPI_Bcast(&tip4pflag,1,MPI_INT,0,world); +} + +/* ---------------------------------------------------------------------- */ + +template +double PairTableKokkos::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + int itable; + double fraction,value,a,b,phi; + int tlm1 = tablength - 1; + + Table *tb = &tables[tabindex[itype][jtype]]; + if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff"); + + if (tabstyle == LOOKUP) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + fforce = factor_lj * tb->f[itable]; + } else if (tabstyle == LINEAR) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + fraction = (rsq - tb->rsq[itable]) * tb->invdelta; + value = tb->f[itable] + fraction*tb->df[itable]; + fforce = factor_lj * value; + } else if (tabstyle == SPLINE) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + b = (rsq - tb->rsq[itable]) * tb->invdelta; + a = 1.0 - b; + value = a * tb->f[itable] + b * tb->f[itable+1] + + ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * + tb->deltasq6; + fforce = factor_lj * value; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; + itable >>= tb->nshiftbits; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; + value = tb->f[itable] + fraction*tb->df[itable]; + fforce = factor_lj * value; + } + + if (tabstyle == LOOKUP) + phi = tb->e[itable]; + else if (tabstyle == LINEAR || tabstyle == BITMAP) + phi = tb->e[itable] + fraction*tb->de[itable]; + else + phi = a * tb->e[itable] + b * tb->e[itable+1] + + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6; + return factor_lj*phi; +} + +/* ---------------------------------------------------------------------- + return the Coulomb cutoff for tabled potentials + called by KSpace solvers which require that all pairwise cutoffs be the same + loop over all tables not just those indexed by tabindex[i][j] since + no way to know which tables are active since pair::init() not yet called +------------------------------------------------------------------------- */ + +template +void *PairTableKokkos::extract(const char *str, int &dim) +{ + if (strcmp(str,"cut_coul") != 0) return NULL; + if (ntables == 0) error->all(FLERR,"All pair coeffs are not set"); + + double cut_coul = tables[0].cut; + for (int m = 1; m < ntables; m++) + if (tables[m].cut != cut_coul) + error->all(FLERR, + "Pair table cutoffs must all be equal to use with KSpace"); + dim = 0; + return &tables[0].cut; +} + template void PairTableKokkos::init_style() { diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 5b3f3852c3..4d3a9ec106 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -22,7 +22,7 @@ PairStyle(table/kk/host,PairTableKokkos) #ifndef LMP_PAIR_TABLE_KOKKOS_H #define LMP_PAIR_TABLE_KOKKOS_H -#include "pair_table.h" +#include "pair.h" #include "pair_kokkos.h" #include "neigh_list_kokkos.h" #include "atom_kokkos.h" @@ -38,7 +38,7 @@ template class PairTableComputeFunctor; template -class PairTableKokkos : public PairTable { +class PairTableKokkos : public Pair { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; @@ -59,9 +59,18 @@ class PairTableKokkos : public PairTable { const NeighListKokkos &list) const; */ void settings(int, char **); + void coeff(int, char **); double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + double single(int, int, int, int, double, double, double, double &); + void *extract(const char *, int &); + void init_style(); + protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; @@ -98,6 +107,17 @@ class PairTableKokkos : public PairTable { typename ArrayTypes::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2; }; + struct Table { + int ninput,rflag,fpflag,match,ntablebits; + int nshiftbits,nmask; + double rlo,rhi,fplo,fphi,cut; + double *rfile,*efile,*ffile; + double *e2file,*f2file; + double innersq,delta,invdelta,deltasq6; + double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; + }; + int ntables; + Table *tables; TableDeviceConst d_table_const; TableDevice* d_table; TableHost* h_table; @@ -108,6 +128,15 @@ class PairTableKokkos : public PairTable { typename ArrayTypes::t_ffloat_2d d_cutsq; void allocate(); + void read_table(Table *, char *, char *); + void param_extract(Table *, char *); + void bcast_table(Table *); + void spline_table(Table *); + void compute_table(Table *); + void null_table(Table *); + void free_table(Table *); + void spline(double *, double *, int, double, double, double *); + double splint(double *, double *, double *, int, double); typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array_const c_x; @@ -176,6 +205,11 @@ class PairTableKokkos : public PairTable { friend void pair_virial_fdotr_compute(PairTableKokkos*); }; + + + + + } #endif @@ -255,10 +289,4 @@ E: Cannot use chosen neighbor list style with lj/cut/kk That style is not supported by Kokkos. - - - */ - - - From cc1b55e0310a5a0953d500c1b92b274ac8acf009 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 23 Dec 2016 12:36:05 -0700 Subject: [PATCH 033/267] Merging USER-DPD updates --- doc/src/fix_eos_table_rx.txt | 42 +- doc/src/pair_exp6_rx.txt | 60 ++- doc/src/pair_multi_lucy_rx.txt | 13 +- doc/src/pair_table_rx.txt | 15 +- .../USER/dpd/dpde-vv/log.dpde-vv.reference | 232 ++++----- .../USER/dpd/dpdrx-shardlow/in.dpdrx-shardlow | 2 +- .../log.dpdrx-shardlow.reference | 58 ++- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 40 +- src/USER-DPD/fix_eos_table_rx.cpp | 139 ++++-- src/USER-DPD/fix_eos_table_rx.h | 3 +- src/USER-DPD/fix_rx.cpp | 15 +- src/USER-DPD/pair_exp6_rx.cpp | 445 ++++++++++++------ src/USER-DPD/pair_exp6_rx.h | 13 +- src/USER-DPD/pair_multi_lucy_rx.cpp | 126 +++-- src/USER-DPD/pair_multi_lucy_rx.h | 3 +- src/USER-DPD/pair_table_rx.cpp | 174 ++++--- src/USER-DPD/pair_table_rx.h | 5 +- 17 files changed, 896 insertions(+), 489 deletions(-) diff --git a/doc/src/fix_eos_table_rx.txt b/doc/src/fix_eos_table_rx.txt index f92b405f49..749642f57c 100644 --- a/doc/src/fix_eos_table_rx.txt +++ b/doc/src/fix_eos_table_rx.txt @@ -10,7 +10,7 @@ fix eos/table/rx command :h3 [Syntax:] -fix ID group-ID eos/table/rx style file1 N keyword file2 :pre +fix ID group-ID eos/table/rx style file1 N keyword ... :pre ID, group-ID are documented in "fix"_fix.html command eos/table/rx = style name of this fix command @@ -18,11 +18,16 @@ style = {linear} = method of interpolation file1 = filename containing the tabulated equation of state N = use N values in {linear} tables keyword = name of table keyword correponding to table file -file2 = filename containing the heats of formation of each species :ul +file2 = filename containing the heats of formation of each species (optional) +deltaHf = heat of formation for a single species in energy units (optional) +energyCorr = energy correction in energy units (optional) +tempCorrCoeff = temperature correction coefficient (optional) :ul [Examples:] -fix 1 all eos/table/rx linear eos.table 10000 KEYWORD thermo.table :pre +fix 1 all eos/table/rx linear eos.table 10000 KEYWORD thermo.table +fix 1 all eos/table/rx linear eos.table 10000 KEYWORD 1.5 +fix 1 all eos/table/rx linear eos.table 10000 KEYWORD 1.5 0.025 0.0 :pre [Description:] @@ -39,7 +44,15 @@ where {m} is the number of species, {c_i,j} is the concentration of species {j} in particle {i}, {u_j} is the internal energy of species j, {DeltaH_f,j} is the heat of formation of species {j}, N is the number of molecules represented by the coarse-grained particle, kb is the -Boltzmann constant, and T is the temperature of the system. +Boltzmann constant, and T is the temperature of the system. Additionally, +it is possible to modify the concentration-dependent particle internal +energy relation by adding an energy correction, temperature-dependent +correction, and/or a molecule-dependent correction. An energy correction can +be specified as a constant (in energy units). A temperature correction can be +specified by multiplying a temperature correction coefficient by the +internal temperature. A molecular correction can be specified by +by multiplying a molecule correction coefficient by the average number of +product gas particles in the coarse-grain particle. Fix {eos/table/rx} creates interpolation tables of length {N} from {m} internal energy values of each species {u_j} listed in a file as a @@ -58,6 +71,14 @@ file is described below. The second filename specifies a file containing heat of formation {DeltaH_f,j} for each species. +In cases where the coarse-grain particle represents a single molecular +species (i.e., no reactions occur and fix {rx} is not present in the input file), +fix {eos/table/rx} can be applied in a similar manner to fix {eos/table} +within a non-reactive DPD simulation. In this case, the heat of formation +filename is replaced with the heat of formation value for the single species. +Additionally, the energy correction and temperature correction coefficients may +also be specified as fix arguments. + :line The format of a tabulated file is as follows (without the @@ -116,6 +137,19 @@ Note that the species can be listed in any order. The tag that is used as the species name must correspond with the tags used to define the reactions with the "fix rx"_fix_rx.html command. +Alternatively, corrections to the EOS can be included by specifying +three additional columns that correspond to the energy correction, +the temperature correction coefficient and molecule correction +coefficient. In this case, the format of the file is as follows: + +# HEAT OF FORMATION TABLE (one or more comment or blank lines) :pre + (blank) +h2 0.00 1.23 0.025 0.0 (species name, heat of formation, energy correction, temperature correction coefficient, molecule correction coefficient) +no2 0.34 0.00 0.000 -1.76 +n2 0.00 0.00 0.000 -1.76 +... +no 0.93 0.00 0.000 -1.76 :pre + :line [Restrictions:] diff --git a/doc/src/pair_exp6_rx.txt b/doc/src/pair_exp6_rx.txt index 7b22dccc4f..dafba2c44c 100644 --- a/doc/src/pair_exp6_rx.txt +++ b/doc/src/pair_exp6_rx.txt @@ -10,16 +10,21 @@ pair_style exp6/rx command :h3 [Syntax:] -pair_style exp6/rx cutoff :pre +pair_style exp6/rx cutoff ... :pre -cutoff = global cutoff for DPD interactions (distance units) :ul +cutoff = global cutoff for DPD interactions (distance units) +weighting = fractional or molecular (optional) :ul [Examples:] pair_style exp6/rx 10.0 -pair_coeff * * exp6.params h2o h2o 1.0 1.0 10.0 -pair_coeff * * exp6.params h2o 1fluid 1.0 1.0 10.0 -pair_coeff * * exp6.params 1fluid 1fluid 1.0 1.0 10.0 :pre +pair_style exp6/rx 10.0 fractional +pair_style exp6/rx 10.0 molecular +pair_coeff * * exp6.params h2o h2o exponent 1.0 1.0 10.0 +pair_coeff * * exp6.params h2o 1fluid exponent 1.0 1.0 10.0 +pair_coeff * * exp6.params 1fluid 1fluid exponent 1.0 1.0 10.0 +pair_coeff * * exp6.params 1fluid 1fluid none 10.0 +pair_coeff * * exp6.params 1fluid 1fluid polynomial filename 10.0 :pre [Description:] @@ -50,14 +55,36 @@ defined in the reaction kinetics files specified with the "fix rx"_fix_rx.html command or they must correspond to the tag "1fluid", signifying interaction with a product species mixture determined through a one-fluid approximation. The interaction potential is -weighted by the geometric average of the concentrations of the two -species. The coarse-grained potential is stored before and after the +weighted by the geometric average of either the mole fraction concentrations +or the number of molecules associated with the interacting coarse-grained +particles (see the {fractional} or {molecular} weighting pair style options). +The coarse-grained potential is stored before and after the reaction kinetics solver is applied, where the difference is defined to be the internal chemical energy (uChem). -The fourth and fifth arguments specify the {Rm} and {epsilon} scaling exponents. +The fourth argument specifies the type of scaling that will be used +to scale the EXP-6 paramters as reactions occur. Currently, there +are three scaling options: {exponent}, {polynomial} and {none}. -The final argument specifies the interaction cutoff. +Exponent scaling requires two additional arguments for scaling +the {Rm} and {epsilon} parameters, respectively. The scaling factor +is computed by phi^exponent, where phi is the number of molecules +represented by the coarse-grain particle and exponent is specified +as a pair coefficient argument for {Rm} and {epsilon}, respectively. +The {Rm} and {epsilon} parameters are multiplied by the scaling +factor to give the scaled interaction paramters for the CG particle. + +Polynomial scaling requires a filename to be specified as a pair +coeff argument. The file contains the coefficients to a fifth order +polynomial for the {alpha}, {epsilon} and {Rm} parameters that depend +upon phi (the number of molecules represented by the CG particle). +The format of a polynomial file is provided below. + +The {none} option to the scaling does not have any additional pair coeff +arguments. This is equivalent to specifying the {exponent} option with +{Rm} and {epsilon} exponents of 0.0 and 0.0, respectively. + +The final argument specifies the interaction cutoff (optional). :line @@ -70,6 +97,19 @@ no2 exp6 13.60 0.01 3.70 ... co2 exp6 13.00 0.03 3.20 :pre +The format of the polynomial scaling file as follows (without the +parenthesized comments): + +# POLYNOMIAL FILE (one or more comment or blank lines) :pre +# General Functional Form: +# A*phi^5 + B*phi^4 + C*phi^3 + D*phi^2 + E*phi + F +# +# Parameter A B C D E F + (blank) +alpha 0.0000 0.00000 0.00008 0.04955 -0.73804 13.63201 +epsilon 0.0000 0.00478 -0.06283 0.24486 -0.33737 2.60097 +rm 0.0001 -0.00118 -0.00253 0.05812 -0.00509 1.50106 :pre + A section begins with a non-blank line whose 1st character is not a "#"; blank lines or lines starting with "#" can be used as comments between sections. @@ -117,4 +157,4 @@ LAMMPS"_Section_start.html#start_3 section for more info. "pair_coeff"_pair_coeff.html -[Default:] none +[Default:] fractional weighting diff --git a/doc/src/pair_multi_lucy_rx.txt b/doc/src/pair_multi_lucy_rx.txt index 14b5b32181..75547a71ce 100644 --- a/doc/src/pair_multi_lucy_rx.txt +++ b/doc/src/pair_multi_lucy_rx.txt @@ -13,11 +13,14 @@ pair_style multi/lucy/rx command :h3 pair_style multi/lucy/rx style N keyword ... :pre style = {lookup} or {linear} = method of interpolation -N = use N values in {lookup}, {linear} tables :ul +N = use N values in {lookup}, {linear} tables +weighting = fractional or molecular (optional) :ul [Examples:] pair_style multi/lucy/rx linear 1000 +pair_style multi/lucy/rx linear 1000 fractional +pair_style multi/lucy/rx linear 1000 molecular pair_coeff * * multibody.table ENTRY1 h2o h2o 7.0 pair_coeff * * multibody.table ENTRY1 h2o 1fluid 7.0 :pre @@ -94,8 +97,10 @@ tags must either correspond to the species defined in the reaction kinetics files specified with the "fix rx"_fix_rx.html command or they must correspond to the tag "1fluid", signifying interaction with a product species mixture determined through a one-fluid approximation. -The interaction potential is weighted by the geometric average of the -concentrations of the two species. The coarse-grained potential is +The interaction potential is weighted by the geometric average of +either the mole fraction concentrations or the number of molecules +associated with the interacting coarse-grained particles (see the +{fractional} or {molecular} weighting pair style options). The coarse-grained potential is stored before and after the reaction kinetics solver is applied, where the difference is defined to be the internal chemical energy (uChem). @@ -205,7 +210,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. "pair_coeff"_pair_coeff.html -[Default:] none +[Default:] fractional weighting :line diff --git a/doc/src/pair_table_rx.txt b/doc/src/pair_table_rx.txt index e6006f62e2..d089a4f9da 100644 --- a/doc/src/pair_table_rx.txt +++ b/doc/src/pair_table_rx.txt @@ -10,16 +10,17 @@ pair_style table/rx command :h3 [Syntax:] -pair_style table style N :pre +pair_style table style N ... :pre style = {lookup} or {linear} or {spline} or {bitmap} = method of interpolation N = use N values in {lookup}, {linear}, {spline} tables -N = use 2^N values in {bitmap} tables +weighting = fractional or molecular (optional) :ul [Examples:] pair_style table/rx linear 1000 -pair_style table/rx bitmap 12 +pair_style table/rx linear 1000 fractional +pair_style table/rx linear 1000 molecular pair_coeff * * rxn.table ENTRY1 h2o h2o 10.0 pair_coeff * * rxn.table ENTRY1 1fluid 1fluid 10.0 pair_coeff * 3 rxn.table ENTRY1 h2o no2 10.0 :pre @@ -84,8 +85,10 @@ tags must either correspond to the species defined in the reaction kinetics files specified with the "fix rx"_fix_rx.html command or they must correspond to the tag "1fluid", signifying interaction with a product species mixture determined through a one-fluid approximation. -The interaction potential is weighted by the geometric average of the -concentrations of the two species. The coarse-grained potential is +The interaction potential is weighted by the geometric average of +either the mole fraction concentrations or the number of molecules +associated with the interacting coarse-grained particles (see the +{fractional} or {molecular} weighting pair style options). The coarse-grained potential is stored before and after the reaction kinetics solver is applied, where the difference is defined to be the internal chemical energy (uChem). @@ -230,7 +233,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. "pair_coeff"_pair_coeff.html -[Default:] none +[Default:] fractional weighting :line diff --git a/examples/USER/dpd/dpde-vv/log.dpde-vv.reference b/examples/USER/dpd/dpde-vv/log.dpde-vv.reference index 7bc7bda365..800a39f7a5 100644 --- a/examples/USER/dpd/dpde-vv/log.dpde-vv.reference +++ b/examples/USER/dpd/dpde-vv/log.dpde-vv.reference @@ -35,129 +35,133 @@ thermo_modify format float %24.16f run 1000 Neighbor list info ... - 1 neighbor list requests update every 1 steps, delay 0 steps, check no max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 10.6 ghost atom cutoff = 10.6 - binsize = 5.3 -> bins = 25 25 25 -Memory usage per processor = 3.36353 Mbytes + binsize = 5.3, bins = 25 25 25 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair dpd/fdt/energy, perpetual + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Memory usage per processor = 4.28221 Mbytes Step Temp Press PotEng KinEng c_dpdU[1] c_dpdU[2] v_totEnergy c_dpdU[4] - 0 301.4391322267262012 1636.1776395935085020 1188.6488072196075336 394.4722035796053206 7852.5601874986105031 7852.5601874986105031 17288.2413857964347699 299.9999999999841407 - 10 301.4791572483523510 1486.4422375141198245 1188.7147620806101713 394.5245815119678241 7852.5601874999802021 7852.3731942333779443 17288.1727253259377903 299.9960221120699089 - 20 301.4275643919337426 1677.9356110821624952 1188.7839634625399867 394.4570655673388728 7852.5601874999938445 7852.3711851933012440 17288.1724017231754260 299.9955485734552099 - 30 301.2240988054542186 1452.7304951528931269 1188.8550809767796181 394.1908044563202225 7852.5601875000002110 7852.5679666239848302 17288.1740395570850524 299.9988968405210130 - 40 301.1023506886409677 1527.9758363521380033 1188.9264527568634549 394.0314812537677653 7852.5601874999947540 7852.6574764573806533 17288.1755979680056043 300.0001694462812338 - 50 301.0409654880461972 1597.1737251233498682 1188.9944523606982330 393.9511507566391515 7852.5601875000029395 7852.6700547249911324 17288.1758453423317405 299.9999653064982681 - 60 301.2904978886139133 1610.8630327676828529 1189.0651026961211301 394.2776962691256131 7852.5601874999829306 7852.2734988976435488 17288.1764853628737910 299.9919857290491905 - 70 300.8575037843163500 1489.3259312130880971 1189.1295686642290548 393.7110673208616731 7852.5601874999856591 7852.7707182199101226 17288.1715417049854295 300.0010992278233175 - 80 300.5955830326474825 1449.3896097889587509 1189.1880764967559116 393.3683100440913449 7852.5601875000411383 7853.0484238882281716 17288.1649979291178170 300.0059513551503301 - 90 301.0092332775843147 1553.9266324350364812 1189.2470037925052111 393.9096250433288446 7852.5601875000420478 7852.4452067113825251 17288.1620230472581170 299.9940347326859182 - 100 301.0478004479094238 1539.2270336322194453 1189.3010269201699884 393.9600951881690207 7852.5601875000074870 7852.3416236045995902 17288.1629332129450631 299.9916385566916119 - 110 300.9609384905550087 1500.0429484565006533 1189.3524514939088021 393.8464250502817663 7852.5601874999983920 7852.4114980357189779 17288.1705620799075405 299.9925626482005327 - 120 300.9625536631411933 1630.5065919443034090 1189.4006029528841282 393.8485387131115658 7852.5601875000575092 7852.3600810123671181 17288.1694101784196391 299.9911580775880680 - 130 301.0373750247310340 1539.2267307640183844 1189.4426173625224692 393.9464521696795032 7852.5601874999993015 7852.2178388309775983 17288.1670958631802932 299.9879581026651749 - 140 300.7465104415114752 1550.8353679735087098 1189.4887352231000932 393.5658181350791551 7852.5601874999920256 7852.5559582333216895 17288.1706990914935886 299.9939749909034958 - 150 300.6667173911141617 1634.8987162883277051 1189.5368575067818711 393.4613985788388959 7852.5601874999920256 7852.6079668015609059 17288.1664103871735279 299.9946423938895350 - 160 300.4684731724562425 1462.9400882126803936 1189.5825022927965620 393.2019703048678707 7852.5601874999847496 7852.8265187980177870 17288.1711788956672535 299.9983600613423960 - 170 300.1439323338466920 1510.2352578813552100 1189.6305700279478970 392.7772665220106774 7852.5601874999802021 7853.2009671047335360 17288.1689911546709482 300.0051118582463232 - 180 300.1074244553407198 1529.6307083879951279 1189.6764977580119194 392.7294912276224181 7852.5601874999729262 7853.2047509722533505 17288.1709274578606710 300.0047089238623812 - 190 300.4193298066089142 1546.3205495807171701 1189.7172820166240399 393.1376598363699486 7852.5601874999847496 7852.7461854379371289 17288.1613147909156396 299.9954451643528728 - 200 300.3353919251508728 1532.5496449337254035 1189.7600175880224924 393.0278162310690391 7852.5601874999683787 7852.8107089913455638 17288.1587303104060993 299.9962707550171785 - 210 300.3276568499739483 1504.8178651700843602 1189.7998299597820733 393.0176938818990493 7852.5601875000156724 7852.7810130200659842 17288.1587243617614149 299.9953436245502871 - 220 300.5768315696971626 1592.5896084568344122 1189.8391466344742184 393.3437713226064716 7852.5601875000329528 7852.4205574703573802 17288.1636629274726147 299.9880321846658831 - 230 300.6587445618569063 1672.3049358942289473 1189.8766340798690635 393.4509650976162334 7852.5601874999847496 7852.2733199687863817 17288.1611066462573945 299.9848228571166828 - 240 300.7517707836825025 1527.1722267937811921 1189.9126240081129708 393.5727019751183207 7852.5601875000065775 7852.1160682173085661 17288.1615817005440476 299.9814952182625802 - 250 300.8473715548367409 1589.1847713095248764 1189.9441342461948352 393.6978079843565865 7852.5601875000047585 7851.9625847797888127 17288.1647145103452203 299.9782210858571148 - 260 300.8450266408960942 1623.1896863377055524 1189.9636161513917614 393.6947393603111891 7852.5601874999820211 7851.9471828473988353 17288.1657258590821584 299.9775302202895659 - 270 300.6663619570709898 1564.5160171187899323 1189.9764081239700317 393.4609334472908131 7852.5601875000193104 7852.1708276117251444 17288.1683566830033669 299.9812899253168439 - 280 300.7668534205726019 1618.5400526904263643 1189.9872008155405183 393.5924395618274048 7852.5601875000184009 7852.0271568534708422 17288.1669847308585304 299.9781169783826158 - 290 300.8462727198648849 1562.6765776748122789 1189.9918265985252219 393.6963700162682471 7852.5601875000211294 7851.9189772084127981 17288.1673613232269417 299.9756806168044250 - 300 300.8095414073812890 1525.1785808192844343 1189.9873922767767453 393.6483023295390922 7852.5601875000020300 7851.9657301693578120 17288.1616122756749974 299.9761279889730758 - 310 300.9496330741350221 1566.5597234051326723 1189.9752299662607129 393.8316304464934774 7852.5601875000056680 7851.7898117189633922 17288.1568596317229094 299.9723726900590464 - 320 301.2370566356515837 1513.6869483705047514 1189.9626455872523820 394.2077614578674343 7852.5601874999929350 7851.4248466706330873 17288.1554412157456682 299.9650543775110236 - 330 301.3279721508968692 1549.0667862452519330 1189.9513389477854162 394.3267362020337146 7852.5601874999929350 7851.3129955581916875 17288.1512582080031279 299.9625537201162615 - 340 301.1145736537583844 1414.7930515101759283 1189.9408691169965095 394.0474765890400590 7852.5601874999993015 7851.6028846074832472 17288.1514178135184920 299.9677356565828745 - 350 301.1651600907370039 1529.8016115175887535 1189.9314470205476937 394.1136755032911196 7852.5601874999929350 7851.5441417268757505 17288.1494517507089768 299.9662576716461331 - 360 301.0550563185083206 1536.7721716375504002 1189.9200519814730796 393.9695904359920178 7852.5601875000074870 7851.7101209691463737 17288.1599508866202086 299.9690811750865009 - 370 301.1008976932964742 1522.3385843459479929 1189.9109162496640693 394.0295798208944120 7852.5601875000211294 7851.6603423306560217 17288.1610259012340975 299.9677565060027860 - 380 301.1656898730700505 1505.0548721701993600 1189.9005648244351505 394.1143687921909304 7852.5601875000056680 7851.5816827598300733 17288.1568038764598896 299.9659906785156522 - 390 300.8379322662876802 1740.9151205755624687 1189.8851457594087151 393.6854554509390596 7852.5601875000238579 7852.0268864110385039 17288.1576751214088290 299.9741278188615752 - 400 300.8663790447546376 1564.9461156870302148 1189.8690133470408909 393.7226817503372445 7852.5601875000411383 7852.0043792319993372 17288.1562618294192362 299.9732593416579789 - 410 300.6263441860635908 1564.2840871092373618 1189.8566574093877080 393.4085650033033517 7852.5601874999892971 7852.3284491703725507 17288.1538590830532485 299.9792095875052951 - 420 300.5302259436974168 1438.1569922368764765 1189.8406936554465574 393.2827818158641549 7852.5601875000302243 7852.4696075433648730 17288.1532705147074012 299.9815165752025337 - 430 300.5877786105220935 1503.3641639033023694 1189.8251514530138593 393.3580969454444016 7852.5601874999802021 7852.4023373559457468 17288.1457732543858583 299.9798346272511935 - 440 300.7289160804472772 1689.2527029957295781 1189.8035410609209066 393.5427936314976591 7852.5601875000029395 7852.2436462415198548 17288.1501684339418716 299.9764596782897570 - 450 300.9487198282456575 1497.3668092174791582 1189.7808137689632986 393.8304353457919547 7852.5601874999938445 7851.9788323927432430 17288.1502690074921702 299.9710227473042323 - 460 300.9359942496024587 1625.1573864018491804 1189.7615359247627111 393.8137822755282400 7852.5601875000147629 7852.0165192783370003 17288.1520249786408385 299.9713565393226986 - 470 301.0000133856357252 1486.1561922844011860 1189.7439269526955741 393.8975596188205941 7852.5601874999656502 7851.9561324572268859 17288.1578065287103527 299.9697143418395626 - 480 300.8568627175957886 1535.6080526199095857 1189.7237810071801505 393.7102284019063063 7852.5601874999601932 7852.1697010727630186 17288.1638979818089865 299.9732503057674080 - 490 301.0608040775520067 1497.3221544489886128 1189.7062242497636362 393.9771121242308709 7852.5601874999974825 7851.9258988739011329 17288.1694227478947141 299.9682362511933320 - 500 301.0232592587148019 1517.5854528541199215 1189.6911287485861521 393.9279798589197981 7852.5601875000247674 7851.9823225510326665 17288.1616186585633841 299.9690333355835037 - 510 300.7038579923685120 1420.2615974401142012 1189.6747661513456933 393.5100018730125839 7852.5601874999674692 7852.4114869568047652 17288.1564424811294884 299.9768186576545759 - 520 300.5917863355052759 1537.4862082427132464 1189.6604754398756540 393.3633415734188361 7852.5601875000029395 7852.5789017095057716 17288.1629062228021212 299.9795694302102333 - 530 300.4751352158502868 1481.1071694751799441 1189.6453243069925065 393.2106884527691477 7852.5601874999811116 7852.7451655714066874 17288.1613658311471227 299.9823181268525900 - 540 300.5380123640739498 1547.3461372766389559 1189.6261485232855648 393.2929713568877332 7852.5601875000375003 7852.6850583598352387 17288.1643657400454686 299.9808112190538623 - 550 300.4253885005187499 1544.3485889749692888 1189.6033595464525661 393.1455884232119047 7852.5601874999756546 7852.8598718466746504 17288.1690073163154011 299.9835860164698147 - 560 300.3263552442093101 1556.5150300058251105 1189.5759163336824713 393.0159905619273673 7852.5601875000111249 7853.0148613782675966 17288.1669557738860021 299.9861837797674866 - 570 300.1977324643196425 1511.2320626303917379 1189.5441090918316149 392.8476709710407704 7852.5601875000102154 7853.2098259401755058 17288.1617935030590161 299.9896761688499964 - 580 300.3543631005173893 1588.9566243200433746 1189.5094471319721379 393.0526424747489500 7852.5601875000156724 7853.0374555421631158 17288.1597326488990802 299.9859298211933378 - 590 300.5019108864805730 1504.4406939723214691 1189.4809412920112663 393.2457278908070748 7852.5601874999874781 7852.8704277855340479 17288.1572844683396397 299.9823573257917815 - 600 300.4791158523048011 1540.4690749004150803 1189.4551948503105905 393.2158976318902432 7852.5601875000220389 7852.9312239063838206 17288.1625038886049879 299.9832002920041987 - 610 300.5939139841889869 1368.0565839211087678 1189.4252547652590692 393.3661258776944578 7852.5601874999574648 7852.8130977336286378 17288.1646658765384927 299.9807742697515778 - 620 300.7674247480806002 1483.2566452708945235 1189.3941250938435132 393.5931872179773450 7852.5601875000193104 7852.6187967208716145 17288.1662965327122947 299.9766963671718258 - 630 300.7920034341021278 1543.0699124130637756 1189.3598279316649950 393.6253516166882491 7852.5601875000302243 7852.6219971866230480 17288.1673642350069713 299.9762538437230432 - 640 300.8032734267029014 1423.2549819291616586 1189.3293074476885067 393.6400998638143278 7852.5601874999847496 7852.6384826097782934 17288.1680774212654796 299.9762118202994543 - 650 300.7516995878241346 1542.6559695158523482 1189.3021161045705867 393.5726088061030055 7852.5601874999720167 7852.7361949473242930 17288.1711073579681397 299.9775656396505497 - 660 300.8699697098109596 1675.5121937767839881 1189.2687179804190691 393.7273806013013768 7852.5601874999802021 7852.6179739687149777 17288.1742600504148868 299.9750492262036801 - 670 301.0255004186900578 1520.7397686587873977 1189.2284265783687260 393.9309127074437242 7852.5601874999847496 7852.4592279727157802 17288.1787547585117863 299.9715123049731460 - 680 301.1071983488760679 1651.9751417063259851 1189.1858967311386550 394.0378250459656329 7852.5601875000002110 7852.3982826328638112 17288.1821919099675142 299.9699481289110850 - 690 301.0027086454253435 1496.1607274163641250 1189.1436949551202815 393.9010867158519886 7852.5601875000293148 7852.5788938360938118 17288.1838630070960789 299.9731939774295597 - 700 300.9009090279179759 1551.8182127127668082 1189.0993919251338866 393.7678687121208441 7852.5601875000102154 7852.7513665452252098 17288.1788146824910655 299.9761043445071209 - 710 301.2325536720837817 1678.1546953970853338 1189.0528341066981284 394.2018687459686817 7852.5601874999956635 7852.3633298995819132 17288.1782202522445004 299.9683013583347133 - 720 301.2122298224125529 1524.1415452491430642 1189.0046957644285612 394.1752723525083866 7852.5601875000093059 7852.4351629896145823 17288.1753186065616319 299.9693315350040734 - 730 301.0763282392692304 1547.1987029633166912 1188.9602551214045434 393.9974275034455218 7852.5601874999883876 7852.6518053705112834 17288.1696754953518393 299.9732715774841267 - 740 301.3262401480515109 1544.7045314021493141 1188.9131307177485724 394.3244696516559884 7852.5601874999965730 7852.3694201272974169 17288.1672079966992897 299.9674666811455950 - 750 301.5740779122830304 1591.1785078054851965 1188.8637580645938669 394.6487975126887022 7852.5601875000029395 7852.0919529470393172 17288.1646960243233480 299.9616008527094095 - 760 301.4385361878654521 1547.3218422039201414 1188.8113669183098864 394.4714235854450521 7852.5601874999838401 7852.3161911124070684 17288.1591691161447670 299.9656339783694534 - 770 301.6110125684814420 1494.5039561806622714 1188.7581685915934031 394.6971313010439530 7852.5601875000083965 7852.1351720579104949 17288.1506594505553949 299.9619855799395509 - 780 301.8360352039435384 1588.1458619705292676 1188.7039178696472845 394.9916026067776329 7852.5601874999956635 7851.9015195838428554 17288.1572275602629816 299.9572350302977952 - 790 302.1008324754310479 1545.4409171812178556 1188.6491103416560691 395.3381241828382144 7852.5601875000138534 7851.6150048936624444 17288.1624269181702402 299.9513959104631340 - 800 301.9660372380565718 1563.9565804790736365 1188.5964649891604950 395.1617271307158035 7852.5601874999874781 7851.8461249560614306 17288.1645045759250934 299.9555810527747326 - 810 302.0507207347627627 1511.4560763489957935 1188.5468477146612258 395.2725464702810996 7852.5601875000120344 7851.7904104899025697 17288.1699921748586348 299.9541551776504775 - 820 302.4700213214911741 1458.5135514273570152 1188.4981381693974072 395.8212556746473751 7852.5601875000202199 7851.2935886962204677 17288.1731700402851857 299.9441803241180651 - 830 302.2853997979337350 1496.2544527963129894 1188.4496917372191547 395.5796544641875698 7852.5601875000447762 7851.5862641793482908 17288.1757978808018379 299.9494768794835977 - 840 302.0840465730901201 1518.8301331998704882 1188.3994383226176978 395.3161576523596636 7852.5601875000038490 7851.8962146812327774 17288.1719981562127941 299.9550476592922337 - 850 301.8910942560261788 1469.8827850510901953 1188.3489956121345585 395.0636545180261692 7852.5601874999829306 7852.2025804631493884 17288.1754180932912277 299.9606927700139067 - 860 301.7284384160519153 1657.6802015862324424 1188.3052233777652873 394.8507982536594341 7852.5601875000093059 7852.4644669022691232 17288.1806760337058222 299.9652835238809985 - 870 301.6331619894115192 1501.5829953208524330 1188.2628815714097072 394.7261166912876433 7852.5601875000202199 7852.6378180648598573 17288.1870038275774277 299.9682811831179379 - 880 301.3703918424367316 1499.1595903074553462 1188.2195190931643083 394.3822478705861272 7852.5601874999956635 7853.0266423250832304 17288.1885967888301820 299.9755099056966401 - 890 301.4157954313303662 1598.8758859042511631 1188.1845892608291706 394.4416643558612918 7852.5601875000065775 7853.0036606192506952 17288.1901017359487014 299.9745322513492738 - 900 301.4752150615485675 1621.2148728756822038 1188.1517520946135846 394.5194226492019993 7852.5601874999711072 7852.9579580608560718 17288.1893203046420240 299.9733125337182287 - 910 301.4308816315938770 1538.4823217911632582 1188.1159856659232901 394.4614066057066566 7852.5601875000002110 7853.0558695713261841 17288.1934493429580471 299.9748317405193916 - 920 301.4323110133492492 1594.7193046491217956 1188.0835779842032025 394.4632771371357762 7852.5601875000202199 7853.0942701464364291 17288.2013127677964803 299.9751127806911200 - 930 301.4801256941950101 1387.6885377097617038 1188.0464206196895702 394.5258488489681099 7852.5601875000229484 7853.0656502842994087 17288.1981072529815719 299.9740698440909910 - 940 301.8075611840245074 1534.2487040663793323 1188.0124217312886685 394.9543406584059539 7852.5601874999701977 7852.6729444202819650 17288.1998943099461030 299.9660570413493588 - 950 301.6915970126173647 1567.7725992489238251 1187.9790455470049437 394.8025864986412898 7852.5601875000274958 7852.8619557087595240 17288.2037752544347313 299.9694678653150959 - 960 301.6392594677008105 1504.8502165144939227 1187.9439133338105421 394.7340960325207675 7852.5601874999711072 7852.9728807988849439 17288.2110776651898050 299.9711546356286362 - 970 301.6049535791644303 1514.0198965433548892 1187.9094123369413865 394.6892023276233772 7852.5601874999765641 7853.0497909819878259 17288.2085931465298927 299.9722547114341751 - 980 301.2982841679705643 1634.1208149125807267 1187.8768454876480973 394.2878856256063500 7852.5601874999856591 7853.4862008383515786 17288.2111194515891839 299.9802110109069986 - 990 301.2573007350166563 1489.7316698898257528 1187.8432331161868660 394.2342534877078606 7852.5601875000047585 7853.5840096862748396 17288.2216837901723920 299.9819468620868292 - 1000 301.3195135766228532 1562.6587211933920116 1187.8034267774903583 394.3156670604516307 7852.5601874999356369 7853.5372636956635688 17288.2165450335414789 299.9807651637231629 -Loop time of 21.3308 on 1 procs for 1000 steps with 10125 atoms + 0 301.4391322267262012 1636.1776395935080473 1188.6488072196075336 394.4722035796053206 0.0000000000000000 15705.1203749972210062 17288.2413857964347699 299.9999999999841407 + 10 301.4791572483523510 1486.4422375141214161 1188.7147620806101713 394.5245815119678241 0.0000000000000000 15704.9333817333845218 17288.1727253259632562 299.9960221120699089 + 20 301.4275643919337995 1677.9356110821622678 1188.7839634625399867 394.4570655673389865 -0.0000000000000000 15704.9313726932996360 17288.1724017231790640 299.9955485734552667 + 30 301.2240988054542186 1452.7304951528922174 1188.8550809767796181 394.1908044563202225 -0.0000000000000000 15705.1281541239713988 17288.1740395570705005 299.9988968405209562 + 40 301.1023506886409109 1527.9758363521384581 1188.9264527568634549 394.0314812537677085 -0.0000000000000000 15705.2176639573335706 17288.1755979679655866 300.0001694462812907 + 50 301.0409654880461972 1597.1737251233505503 1188.9944523606984603 393.9511507566391515 -0.0000000000000000 15705.2302422249904339 17288.1758453423281026 299.9999653064982112 + 60 301.2904978886138565 1610.8630327676828529 1189.0651026961211301 394.2776962691255562 -0.0000000000000000 15704.8336863976528548 17288.1764853628992569 299.9919857290491905 + 70 300.8575037843164068 1489.3259312130892340 1189.1295686642290548 393.7110673208617300 0.0000000000000000 15705.3309057198275696 17288.1715417049199459 300.0010992278232607 + 80 300.5955830326474825 1449.3896097889576140 1189.1880764967559116 393.3683100440913449 -0.0000000000000000 15705.6086113882302016 17288.1649979290777992 300.0059513551502164 + 90 301.0092332775843147 1553.9266324350371633 1189.2470037925056658 393.9096250433288446 -0.0000000000000000 15705.0053942113881931 17288.1620230472217372 299.9940347326859182 + 100 301.0478004479094238 1539.2270336322201274 1189.3010269201699884 393.9600951881690207 -0.0000000000000000 15704.9018111045588739 17288.1629332128977694 299.9916385566916119 + 110 300.9609384905550655 1500.0429484565015628 1189.3524514939088021 393.8464250502818231 -0.0000000000000000 15704.9716855356964516 17288.1705620798857126 299.9925626482006464 + 120 300.9625536631413070 1630.5065919443020448 1189.4006029528841282 393.8485387131116795 0.0000000000000000 15704.9202685123345873 17288.1694101783286897 299.9911580775880680 + 130 301.0373750247309772 1539.2267307640188392 1189.4426173625224692 393.9464521696794463 -0.0000000000000000 15704.7780263310032751 17288.1670958632057591 299.9879581026650044 + 140 300.7465104415114183 1550.8353679735089372 1189.4887352231000932 393.5658181350790983 0.0000000000000000 15705.1161457332873397 17288.1706990914681228 299.9939749909034958 + 150 300.6667173911142186 1634.8987162883267956 1189.5368575067818711 393.4613985788390096 0.0000000000000000 15705.1681543015274656 17288.1664103871480620 299.9946423938894213 + 160 300.4684731724561857 1462.9400882126797114 1189.5825022927965620 393.2019703048678139 0.0000000000000000 15705.3867062980680203 17288.1711788957327371 299.9983600613422254 + 170 300.1439323338466920 1510.2352578813547552 1189.6305700279476696 392.7772665220106774 -0.0000000000000000 15705.7611546046609874 17288.1689911546200165 300.0051118582463232 + 180 300.1074244553407766 1529.6307083879964921 1189.6764977580119194 392.7294912276225318 -0.0000000000000000 15705.7649384723172261 17288.1709274579516205 300.0047089238623812 + 190 300.4193298066088573 1546.3205495807169427 1189.7172820166242673 393.1376598363698349 0.0000000000000000 15705.3063729379555298 17288.1613147909483814 299.9954451643527022 + 200 300.3353919251508728 1532.5496449337249487 1189.7600175880224924 393.0278162310690391 -0.0000000000000000 15705.3708964914076205 17288.1587303105006868 299.9962707550172922 + 210 300.3276568499739483 1504.8178651700850423 1189.7998299597820733 393.0176938818990493 0.0000000000000000 15705.3412005200552812 17288.1587243617359491 299.9953436245502871 + 220 300.5768315696972195 1592.5896084568353217 1189.8391466344739911 393.3437713226065284 -0.0000000000000000 15704.9807449702821032 17288.1636629273634753 299.9880321846658262 + 230 300.6587445618569063 1672.3049358942282652 1189.8766340798690635 393.4509650976162334 0.0000000000000000 15704.8335074687693123 17288.1611066462537565 299.9848228571169102 + 240 300.7517707836825025 1527.1722267937814195 1189.9126240081131982 393.5727019751183207 -0.0000000000000000 15704.6762557172896777 17288.1615817005222198 299.9814952182625802 + 250 300.8473715548367409 1589.1847713095232848 1189.9441342461948352 393.6978079843565865 0.0000000000000000 15704.5227722798481409 17288.1647145103997900 299.9782210858571148 + 260 300.8450266408959806 1623.1896863377055524 1189.9636161513917614 393.6947393603110186 0.0000000000000000 15704.5073703474117792 17288.1657258591149002 299.9775302202894522 + 270 300.6663619570710466 1564.5160171187892502 1189.9764081239700317 393.4609334472908699 0.0000000000000000 15704.7310151116998895 17288.1683566829597112 299.9812899253167302 + 280 300.7668534205727155 1618.5400526904256822 1189.9872008155405183 393.5924395618275184 0.0000000000000000 15704.5873443533891987 17288.1669847307566670 299.9781169783825590 + 290 300.8462727198648281 1562.6765776748138705 1189.9918265985252219 393.6963700162681334 0.0000000000000000 15704.4791647084566648 17288.1673613232487696 299.9756806168042544 + 300 300.8095414073812890 1525.1785808192844343 1189.9873922767767453 393.6483023295390922 0.0000000000000000 15704.5259176693853078 17288.1616122757004632 299.9761279889731327 + 310 300.9496330741349652 1566.5597234051326723 1189.9752299662607129 393.8316304464933637 0.0000000000000000 15704.3499992189717887 17288.1568596317265474 299.9723726900589327 + 320 301.2370566356514132 1513.6869483705036146 1189.9626455872523820 394.2077614578672069 0.0000000000000000 15703.9850341706151085 17288.1554412157347542 299.9650543775107394 + 330 301.3279721508969260 1549.0667862452526151 1189.9513389477854162 394.3267362020338282 0.0000000000000000 15703.8731830581982649 17288.1512582080176799 299.9625537201162615 + 340 301.1145736537582707 1414.7930515101757010 1189.9408691169962822 394.0474765890398885 0.0000000000000000 15704.1630721074998291 17288.1514178135366819 299.9677356565827040 + 350 301.1651600907369470 1529.8016115175894356 1189.9314470205474663 394.1136755032910628 0.0000000000000000 15704.1043292268568621 17288.1494517506944248 299.9662576716459625 + 360 301.0550563185083206 1536.7721716375513097 1189.9200519814730796 393.9695904359920178 0.0000000000000000 15704.2703084691693221 17288.1599508866347605 299.9690811750866146 + 370 301.1008976932965311 1522.3385843459491298 1189.9109162496640693 394.0295798208944689 0.0000000000000000 15704.2205298306434997 17288.1610259012013557 299.9677565060027860 + 380 301.1656898730701073 1505.0548721701995873 1189.9005648244356053 394.1143687921909873 -0.0000000000000000 15704.1418702597857191 17288.1568038764125959 299.9659906785157091 + 390 300.8379322662877371 1740.9151205755633782 1189.8851457594089425 393.6854554509391164 -0.0000000000000000 15704.5870739109432179 17288.1576751212924137 299.9741278188614046 + 400 300.8663790447545239 1564.9461156870302148 1189.8690133470406636 393.7226817503371308 0.0000000000000000 15704.5645667319495260 17288.1562618293282867 299.9732593416576947 + 410 300.6263441860637045 1564.2840871092375892 1189.8566574093874806 393.4085650033035222 -0.0000000000000000 15704.8886366703736712 17288.1538590830641624 299.9792095875053519 + 420 300.5302259436973031 1438.1569922368769312 1189.8406936554461026 393.2827818158640412 0.0000000000000000 15705.0297950433650840 17288.1532705146746594 299.9815165752024768 + 430 300.5877786105221503 1503.3641639033021420 1189.8251514530136319 393.3580969454445153 -0.0000000000000000 15704.9625248558968451 17288.1457732543567545 299.9798346272512504 + 440 300.7289160804472772 1689.2527029957295781 1189.8035410609209066 393.5427936314976591 -0.0000000000000000 15704.8038337415237038 17288.1501684339418716 299.9764596782894728 + 450 300.9487198282456006 1497.3668092174784761 1189.7808137689632986 393.8304353457918978 -0.0000000000000000 15704.5390198927143501 17288.1502690074703423 299.9710227473042323 + 460 300.9359942496024019 1625.1573864018473614 1189.7615359247631659 393.8137822755281263 0.0000000000000000 15704.5767067783035600 17288.1520249785935448 299.9713565393225849 + 470 301.0000133856357252 1486.1561922844020955 1189.7439269526958014 393.8975596188205941 0.0000000000000000 15704.5163199572089070 17288.1578065287249046 299.9697143418395058 + 480 300.8568627175958454 1535.6080526199100404 1189.7237810071803779 393.7102284019064200 -0.0000000000000000 15704.7298885727686866 17288.1638979818562802 299.9732503057675785 + 490 301.0608040775520067 1497.3221544489890675 1189.7062242497640909 393.9771121242308709 -0.0000000000000000 15704.4860863739140768 17288.1694227479092660 299.9682362511933889 + 500 301.0232592587148019 1517.5854528541185573 1189.6911287485863795 393.9279798589197981 -0.0000000000000000 15704.5425100510510674 17288.1616186585561081 299.9690333355832195 + 510 300.7038579923685120 1420.2615974401142012 1189.6747661513456933 393.5100018730125839 -0.0000000000000000 15704.9716744568013382 17288.1564424811585923 299.9768186576548032 + 520 300.5917863355052759 1537.4862082427125642 1189.6604754398761088 393.3633415734188361 -0.0000000000000000 15705.1390892093895673 17288.1629062226857059 299.9795694302102902 + 530 300.4751352158504574 1481.1071694751785799 1189.6453243069920518 393.2106884527693751 -0.0000000000000000 15705.3053530714041699 17288.1613658311653126 299.9823181268525900 + 540 300.5380123640739498 1547.3461372766387285 1189.6261485232855648 393.2929713568877332 0.0000000000000000 15705.2452458598490921 17288.1643657400236407 299.9808112190538623 + 550 300.4253885005187499 1544.3485889749688340 1189.6033595464525661 393.1455884232119047 0.0000000000000000 15705.4200593467012368 17288.1690073163663328 299.9835860164698147 + 560 300.3263552442091395 1556.5150300058239736 1189.5759163336820166 393.0159905619271399 0.0000000000000000 15705.5750488783432957 17288.1669557739514858 299.9861837797674298 + 570 300.1977324643196994 1511.2320626303924200 1189.5441090918316149 392.8476709710408272 0.0000000000000000 15705.7700134401693504 17288.1617935030408262 299.9896761688500533 + 580 300.3543631005173893 1588.9566243200420104 1189.5094471319723652 393.0526424747489500 -0.0000000000000000 15705.5976430422142585 17288.1597326489354600 299.9859298211932810 + 590 300.5019108864805730 1504.4406939723210144 1189.4809412920112663 393.2457278908070748 -0.0000000000000000 15705.4306152855297114 17288.1572844683469157 299.9823573257918952 + 600 300.4791158523048011 1540.4690749004137160 1189.4551948503108179 393.2158976318902432 0.0000000000000000 15705.4914114063831221 17288.1625038885831600 299.9832002920041418 + 610 300.5939139841890437 1368.0565839211083130 1189.4252547652597514 393.3661258776945715 0.0000000000000000 15705.3732852337052464 17288.1646658766585460 299.9807742697515209 + 620 300.7674247480806002 1483.2566452708929319 1189.3941250938437406 393.5931872179773450 0.0000000000000000 15705.1789842209145718 17288.1662965327341226 299.9766963671719395 + 630 300.7920034341022415 1543.0699124130630935 1189.3598279316649950 393.6253516166883628 -0.0000000000000000 15705.1821846865786938 17288.1673642349305737 299.9762538437231001 + 640 300.8032734267029014 1423.2549819291609765 1189.3293074476887341 393.6400998638143278 -0.0000000000000000 15705.1986701098048798 17288.1680774213091354 299.9762118202993975 + 650 300.7516995878240209 1542.6559695158514387 1189.3021161045703593 393.5726088061028349 0.0000000000000000 15705.2963824473390559 17288.1711073580117954 299.9775656396504360 + 660 300.8699697098108459 1675.5121937767842155 1189.2687179804192965 393.7273806013012063 0.0000000000000000 15705.1781614686860848 17288.1742600504076108 299.9750492262035095 + 670 301.0255004186899441 1520.7397686587889893 1189.2284265783694082 393.9309127074436105 0.0000000000000000 15705.0194154727287241 17288.1787547585408902 299.9715123049731460 + 680 301.1071983488761248 1651.9751417063253029 1189.1858967311388824 394.0378250459656897 0.0000000000000000 15704.9584701329349627 17288.1821919100402738 299.9699481289110281 + 690 301.0027086454255141 1496.1607274163641250 1189.1436949551202815 393.9010867158522160 0.0000000000000000 15705.1390813360922039 17288.1838630070633371 299.9731939774292755 + 700 300.9009090279178622 1551.8182127127668082 1189.0993919251338866 393.7678687121206735 -0.0000000000000000 15705.3115540452217829 17288.1788146824765136 299.9761043445070641 + 710 301.2325536720837817 1678.1546953970841969 1189.0528341066981284 394.2018687459686817 0.0000000000000000 15704.9235173995584773 17288.1782202522263105 299.9683013583346565 + 720 301.2122298224125529 1524.1415452491437463 1189.0046957644283339 394.1752723525083866 0.0000000000000000 15704.9953504895402148 17288.1753186064779584 299.9693315350040734 + 730 301.0763282392692304 1547.1987029633176007 1188.9602551214045434 393.9974275034455218 0.0000000000000000 15705.2119928705469647 17288.1696754953954951 299.9732715774840699 + 740 301.3262401480515109 1544.7045314021493141 1188.9131307177485724 394.3244696516559884 0.0000000000000000 15704.9296076272603386 17288.1672079966665478 299.9674666811455950 + 750 301.5740779122830872 1591.1785078054849691 1188.8637580645940943 394.6487975126887591 0.0000000000000000 15704.6521404470349808 17288.1646960243160720 299.9616008527092959 + 760 301.4385361878655658 1547.3218422039212783 1188.8113669183098864 394.4714235854451658 0.0000000000000000 15704.8763786124927719 17288.1591691162466304 299.9656339783693966 + 770 301.6110125684815557 1494.5039561806624988 1188.7581685915934031 394.6971313010441236 0.0000000000000000 15704.6953595579507237 17288.1506594505881367 299.9619855799396646 + 780 301.8360352039435384 1588.1458619705304045 1188.7039178696477393 394.9916026067776329 0.0000000000000000 15704.4617070838321524 17288.1572275602593436 299.9572350302976247 + 790 302.1008324754310479 1545.4409171812180830 1188.6491103416560691 395.3381241828382144 0.0000000000000000 15704.1751923936917592 17288.1624269181847922 299.9513959104630771 + 800 301.9660372380565718 1563.9565804790738639 1188.5964649891604950 395.1617271307158035 0.0000000000000000 15704.4063124560707365 17288.1645045759469212 299.9555810527747326 + 810 302.0507207347627059 1511.4560763489960209 1188.5468477146607711 395.2725464702810427 0.0000000000000000 15704.3505979898400255 17288.1699921747822373 299.9541551776507617 + 820 302.4700213214913447 1458.5135514273563331 1188.4981381693974072 395.8212556746476025 0.0000000000000000 15703.8537761962070363 17288.1731700402524439 299.9441803241177809 + 830 302.2853997979336214 1496.2544527963145811 1188.4496917372191547 395.5796544641873993 0.0000000000000000 15704.1464516793694202 17288.1757978807763720 299.9494768794834840 + 840 302.0840465730901201 1518.8301331998702608 1188.3994383226179252 395.3161576523596636 0.0000000000000000 15704.4564021812439023 17288.1719981562200701 299.9550476592922337 + 850 301.8910942560260082 1469.8827850510904227 1188.3489956121347859 395.0636545180259986 0.0000000000000000 15704.7627679631386854 17288.1754180932985037 299.9606927700136794 + 860 301.7284384160518016 1657.6802015862315329 1188.3052233777652873 394.8507982536592635 0.0000000000000000 15705.0246544022065791 17288.1806760336330626 299.9652835238807711 + 870 301.6331619894114624 1501.5829953208508414 1188.2628815714099346 394.7261166912875865 0.0000000000000000 15705.1980055648327834 17288.1870038275301340 299.9682811831179947 + 880 301.3703918424367316 1499.1595903074555736 1188.2195190931643083 394.3822478705861272 0.0000000000000000 15705.5868298250898079 17288.1885967888410960 299.9755099056964127 + 890 301.4157954313303662 1598.8758859042509357 1188.1845892608291706 394.4416643558612918 0.0000000000000000 15705.5638481192290783 17288.1901017359195976 299.9745322513492738 + 900 301.4752150615486812 1621.2148728756842502 1188.1517520946144941 394.5194226492021699 0.0000000000000000 15705.5181455608308170 17288.1893203046492999 299.9733125337182287 + 910 301.4308816315937634 1538.4823217911621214 1188.1159856659228353 394.4614066057064861 0.0000000000000000 15705.6160570713091147 17288.1934493429398572 299.9748317405192779 + 920 301.4323110133492492 1594.7193046491240693 1188.0835779842032025 394.4632771371357762 0.0000000000000000 15705.6544576464475540 17288.2013127677855664 299.9751127806913473 + 930 301.4801256941949532 1387.6885377097596574 1188.0464206196900250 394.5258488489680531 0.0000000000000000 15705.6258377843460039 17288.1981072530033998 299.9740698440912183 + 940 301.8075611840245074 1534.2487040663797870 1188.0124217312888959 394.9543406584059539 0.0000000000000000 15705.2331319202457962 17288.1998943099388271 299.9660570413491882 + 950 301.6915970126175353 1567.7725992489226883 1187.9790455470049437 394.8025864986415172 0.0000000000000000 15705.4221432087451831 17288.2037752543910756 299.9694678653152096 + 960 301.6392594677008105 1504.8502165144939227 1187.9439133338107695 394.7340960325207675 0.0000000000000000 15705.5330682989206252 17288.2110776652516506 299.9711546356285226 + 970 301.6049535791644871 1514.0198965433535250 1187.9094123369409317 394.6892023276234909 0.0000000000000000 15705.6099784820144123 17288.2085931465771864 299.9722547114341751 + 980 301.2982841679706780 1634.1208149125800446 1187.8768454876478700 394.2878856256065205 0.0000000000000000 15706.0463883383199573 17288.2111194515746320 299.9802110109068849 + 990 301.2573007350166563 1489.7316698898262075 1187.8432331161866387 394.2342534877078606 0.0000000000000000 15706.1441971863041545 17288.2216837901978579 299.9819468620868292 + 1000 301.3195135766228532 1562.6587211933931485 1187.8034267774903583 394.3156670604516307 0.0000000000000000 15706.0974511956701463 17288.2165450336106005 299.9807651637235040 +Loop time of 17.0881 on 1 procs for 1000 steps with 10125 atoms -Performance: 4.050 ns/day, 5.925 hours/ns, 46.880 timesteps/s -99.8% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 5.056 ns/day, 4.747 hours/ns, 58.520 timesteps/s +100.0% CPU use with 1 MPI tasks x no OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 10.099 | 10.099 | 10.099 | 0.0 | 47.34 -Neigh | 10.145 | 10.145 | 10.145 | 0.0 | 47.56 -Comm | 0.49807 | 0.49807 | 0.49807 | 0.0 | 2.33 -Output | 0.011203 | 0.011203 | 0.011203 | 0.0 | 0.05 -Modify | 0.28296 | 0.28296 | 0.28296 | 0.0 | 1.33 -Other | | 0.295 | | | 1.38 +Pair | 8.0541 | 8.0541 | 8.0541 | 0.0 | 47.13 +Neigh | 8.1306 | 8.1306 | 8.1306 | 0.0 | 47.58 +Comm | 0.39415 | 0.39415 | 0.39415 | 0.0 | 2.31 +Output | 0.01103 | 0.01103 | 0.01103 | 0.0 | 0.06 +Modify | 0.24061 | 0.24061 | 0.24061 | 0.0 | 1.41 +Other | | 0.2576 | | | 1.51 Nlocal: 10125 ave 10125 max 10125 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -170,4 +174,4 @@ Total # of neighbors = 114682 Ave neighs/atom = 11.3266 Neighbor list builds = 1000 Dangerous builds not checked -Total wall time: 0:00:21 +Total wall time: 0:00:17 diff --git a/examples/USER/dpd/dpdrx-shardlow/in.dpdrx-shardlow b/examples/USER/dpd/dpdrx-shardlow/in.dpdrx-shardlow index e65b5a14db..815c974741 100755 --- a/examples/USER/dpd/dpdrx-shardlow/in.dpdrx-shardlow +++ b/examples/USER/dpd/dpdrx-shardlow/in.dpdrx-shardlow @@ -37,7 +37,7 @@ timestep 0.001 pair_style hybrid/overlay dpd/fdt/energy 16.00 234324 exp6/rx 16.00 pair_coeff * * dpd/fdt/energy 0.0 0.05 10.0 16.00 -pair_coeff * * exp6/rx params.exp6 1fluid 1fluid 1.0 1.0 16.00 +pair_coeff * * exp6/rx params.exp6 1fluid 1fluid exponent 1.0 1.0 16.00 fix 1 all shardlow fix 2 all nve diff --git a/examples/USER/dpd/dpdrx-shardlow/log.dpdrx-shardlow.reference b/examples/USER/dpd/dpdrx-shardlow/log.dpdrx-shardlow.reference index 067708154a..b80e033eb9 100644 --- a/examples/USER/dpd/dpdrx-shardlow/log.dpdrx-shardlow.reference +++ b/examples/USER/dpd/dpdrx-shardlow/log.dpdrx-shardlow.reference @@ -48,7 +48,7 @@ timestep 0.001 pair_style hybrid/overlay dpd/fdt/energy 16.00 234324 exp6/rx 16.00 pair_coeff * * dpd/fdt/energy 0.0 0.05 10.0 16.00 -pair_coeff * * exp6/rx params.exp6 1fluid 1fluid 1.0 1.0 16.00 +pair_coeff * * exp6/rx params.exp6 1fluid 1fluid exponent 1.0 1.0 16.00 fix 1 all shardlow fix 2 all nve @@ -69,39 +69,51 @@ dump_modify 2 sort id run 10 Neighbor list info ... - 2 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 18 ghost atom cutoff = 18 - binsize = 9 -> bins = 8 8 5 -Memory usage per processor = 6.52436 Mbytes + binsize = 9, bins = 8 8 5 + 3 neighbor lists, perpetual/occasional/extra = 3 0 0 + (1) pair dpd/fdt/energy, perpetual + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) pair exp6/rx, perpetual, copy from (1) + pair build: copy + stencil: none + bin: none + (3) fix shardlow, perpetual, ssa + pair build: half/bin/newton/ssa + stencil: half/bin/3d/newton/ssa + bin: ssa +Memory usage per processor = 8.39564 Mbytes Step Temp Press Volume PotEng KinEng c_dpdU[1] c_dpdU[2] c_dpdU[3] v_totEnergy c_dpdU[4] - 0 2065.00000000 1368.17463335 179834.51777865 0.00000000 230.35385810 3841.42393279 3841.42393279 0.00000000 7682.84786557 2065.00000000 - 1 2064.93210437 1368.12964881 179834.51777865 0.00000000 230.34628424 3841.42393279 3841.43150665 0.00000000 7682.85543943 2065.20275230 - 2 2067.82089565 1370.04362990 179834.51777865 -0.00000000 230.66853326 3841.42393279 3841.10925763 0.00000000 7682.53319042 2065.32453473 - 3 2070.45225169 1371.78704616 179834.51777865 -0.00000000 230.96206499 3841.42393279 3840.81572590 0.00000000 7682.23965869 2065.45336917 - 4 2075.00241157 1374.80177416 179834.51777865 -0.00000000 231.46964217 3841.42393279 3840.30814872 0.00000000 7681.73208151 2065.52973333 - 5 2073.96509212 1374.11449370 179834.51777865 -0.00000000 231.35392762 3841.42393279 3840.42386327 0.00000000 7681.84779605 2065.76011517 - 6 2074.26516936 1374.31331117 179834.51777865 -0.00000000 231.38740169 3841.42393279 3840.39038920 0.00000000 7681.81432198 2065.95399323 - 7 2071.41069700 1372.42206822 179834.51777865 -0.00000000 231.06898100 3841.42393279 3840.70880989 0.00000000 7682.13274267 2066.23407076 - 8 2071.35844957 1372.38745146 179834.51777865 -0.00000000 231.06315272 3841.42393279 3840.71463817 0.00000000 7682.13857095 2066.43766287 - 9 2071.35676496 1372.38633532 179834.51777865 -0.00000000 231.06296480 3841.42393279 3840.71482609 0.00000000 7682.13875887 2066.64001166 - 10 2066.53172340 1369.18948328 179834.51777865 -0.00000000 230.52472415 3841.42393279 3841.25306673 0.00000000 7682.67699952 2066.97516855 -Loop time of 0.289778 on 1 procs for 10 steps with 864 atoms + 0 2065.00000000 1368.17463335 179834.51777865 0.00000000 230.35385810 0.00000000 7682.84786557 0.00000000 7682.84786557 2065.00000000 + 1 2064.93210437 1368.12964881 179834.51777865 0.00000000 230.34628424 0.00000000 7682.85543943 0.00000000 7682.85543943 2065.20275230 + 2 2067.82089565 1370.04362990 179834.51777865 -0.00000000 230.66853326 0.00000000 7682.53319042 0.00000000 7682.53319042 2065.32453473 + 3 2070.45225169 1371.78704616 179834.51777865 -0.00000000 230.96206499 0.00000000 7682.23965869 0.00000000 7682.23965869 2065.45336917 + 4 2075.00241157 1374.80177416 179834.51777865 -0.00000000 231.46964217 0.00000000 7681.73208151 0.00000000 7681.73208151 2065.52973333 + 5 2073.96509212 1374.11449370 179834.51777865 -0.00000000 231.35392762 -0.00000000 7681.84779605 0.00000000 7681.84779605 2065.76011517 + 6 2074.26516936 1374.31331117 179834.51777865 -0.00000000 231.38740169 -0.00000000 7681.81432198 0.00000000 7681.81432198 2065.95399323 + 7 2071.41069700 1372.42206822 179834.51777865 -0.00000000 231.06898100 -0.00000000 7682.13274267 0.00000000 7682.13274267 2066.23407076 + 8 2071.35844957 1372.38745146 179834.51777865 -0.00000000 231.06315272 0.00000000 7682.13857095 0.00000000 7682.13857095 2066.43766287 + 9 2071.35676496 1372.38633532 179834.51777865 -0.00000000 231.06296480 0.00000000 7682.13875887 0.00000000 7682.13875887 2066.64001166 + 10 2066.53172340 1369.18948328 179834.51777865 -0.00000000 230.52472415 0.00000000 7682.67699952 0.00000000 7682.67699952 2066.97516855 +Loop time of 0.611304 on 1 procs for 10 steps with 864 atoms -Performance: 2.982 ns/day, 8.049 hours/ns, 34.509 timesteps/s -99.4% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 1.413 ns/day, 16.981 hours/ns, 16.358 timesteps/s +98.2% CPU use with 1 MPI tasks x no OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.16405 | 0.16405 | 0.16405 | 0.0 | 56.61 +Pair | 0.34177 | 0.34177 | 0.34177 | 0.0 | 55.91 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00066328 | 0.00066328 | 0.00066328 | 0.0 | 0.23 -Output | 0.037718 | 0.037718 | 0.037718 | 0.0 | 13.02 -Modify | 0.087281 | 0.087281 | 0.087281 | 0.0 | 30.12 -Other | | 7.057e-05 | | | 0.02 +Comm | 0.0013342 | 0.0013342 | 0.0013342 | 0.0 | 0.22 +Output | 0.083583 | 0.083583 | 0.083583 | 0.0 | 13.67 +Modify | 0.18451 | 0.18451 | 0.18451 | 0.0 | 30.18 +Other | | 0.0001087 | | | 0.02 Nlocal: 864 ave 864 max 864 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 559948067d..ce3b547435 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -936,32 +936,32 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double // Fuchslin-Like Exp-6 Scaling double powfuch = 0.0; - if(fuchslinEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinEpsilon); + if(exponentEpsilon < 0.0){ + powfuch = pow(nTotalOFA,-exponentEpsilon); if(powfuch<1e-15) epsilon1 = 0.0; else epsilon1 *= 1.0/powfuch; - powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); + powfuch = pow(nTotalOFA_old,-exponentEpsilon); if(powfuch<1e-15) epsilon1_old = 0.0; else epsilon1_old *= 1.0/powfuch; } else { - epsilon1 *= pow(nTotalOFA,fuchslinEpsilon); - epsilon1_old *= pow(nTotalOFA_old,fuchslinEpsilon); + epsilon1 *= pow(nTotalOFA,exponentEpsilon); + epsilon1_old *= pow(nTotalOFA_old,exponentEpsilon); } - if(fuchslinR < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinR); + if(exponentR < 0.0){ + powfuch = pow(nTotalOFA,-exponentR); if(powfuch<1e-15) rm1 = 0.0; else rm1 *= 1.0/powfuch; - powfuch = pow(nTotalOFA_old,-fuchslinR); + powfuch = pow(nTotalOFA_old,-exponentR); if(powfuch<1e-15) rm1_old = 0.0; else rm1_old *= 1.0/powfuch; } else { - rm1 *= pow(nTotalOFA,fuchslinR); - rm1_old *= pow(nTotalOFA_old,fuchslinR); + rm1 *= pow(nTotalOFA,exponentR); + rm1_old *= pow(nTotalOFA_old,exponentR); } } @@ -990,32 +990,32 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double // Fuchslin-Like Exp-6 Scaling double powfuch = 0.0; - if(fuchslinEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinEpsilon); + if(exponentEpsilon < 0.0){ + powfuch = pow(nTotalOFA,-exponentEpsilon); if(powfuch<1e-15) epsilon2 = 0.0; else epsilon2 *= 1.0/powfuch; - powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); + powfuch = pow(nTotalOFA_old,-exponentEpsilon); if(powfuch<1e-15) epsilon2_old = 0.0; else epsilon2_old *= 1.0/powfuch; } else { - epsilon2 *= pow(nTotalOFA,fuchslinEpsilon); - epsilon2_old *= pow(nTotalOFA_old,fuchslinEpsilon); + epsilon2 *= pow(nTotalOFA,exponentEpsilon); + epsilon2_old *= pow(nTotalOFA_old,exponentEpsilon); } - if(fuchslinR < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinR); + if(exponentR < 0.0){ + powfuch = pow(nTotalOFA,-exponentR); if(powfuch<1e-15) rm2 = 0.0; else rm2 *= 1.0/powfuch; - powfuch = pow(nTotalOFA_old,-fuchslinR); + powfuch = pow(nTotalOFA_old,-exponentR); if(powfuch<1e-15) rm2_old = 0.0; else rm2_old *= 1.0/powfuch; } else { - rm2 *= pow(nTotalOFA,fuchslinR); - rm2_old *= pow(nTotalOFA_old,fuchslinR); + rm2 *= pow(nTotalOFA,exponentR); + rm2_old *= pow(nTotalOFA_old,exponentR); } } diff --git a/src/USER-DPD/fix_eos_table_rx.cpp b/src/USER-DPD/fix_eos_table_rx.cpp index 91ccc8475e..52b1930c1c 100644 --- a/src/USER-DPD/fix_eos_table_rx.cpp +++ b/src/USER-DPD/fix_eos_table_rx.cpp @@ -28,6 +28,12 @@ #define MAXLINE 1024 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + using namespace LAMMPS_NS; using namespace FixConst; @@ -37,17 +43,18 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), ntables(0), tables(NULL), tables2(NULL), dHf(NULL), eosSpecies(NULL) { - if (narg != 8) error->all(FLERR,"Illegal fix eos/table/rx command"); + if (narg != 8 && narg != 10) error->all(FLERR,"Illegal fix eos/table/rx command"); restart_peratom = 1; nevery = 1; - bool rx_flag = false; + rx_flag = false; + nspecies = 1; for (int i = 0; i < modify->nfix; i++) - if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true; - if (!rx_flag) error->all(FLERR,"FixEOStableRX requires a fix rx command."); - - nspecies = atom->nspecies_dpd; - if(nspecies==0) error->all(FLERR,"There are no rx species specified."); + if (strncmp(modify->fix[i]->style,"rx",2) == 0){ + rx_flag = true; + nspecies = atom->nspecies_dpd; + if(nspecies==0) error->all(FLERR,"There are no rx species specified."); + } if (strcmp(arg[3],"linear") == 0) tabstyle = LINEAR; else error->all(FLERR,"Unknown table style in fix eos/table/rx"); @@ -113,8 +120,25 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) : ntables++; } - // Read the Formation Enthalpies - read_file(arg[7]); + // Read the Formation Enthalpies and Correction Coefficients + dHf = new double[nspecies]; + energyCorr = new double[nspecies]; + tempCorrCoeff = new double[nspecies]; + moleculeCorrCoeff= new double[nspecies]; + for (int ii=0; iime == 0) { ptr = fgets(&line[n],MAXLINE-n,fp); @@ -332,7 +358,7 @@ void FixEOStableRX::read_file(char *file) nwords = atom->count_words(line); } - if (nwords != params_per_line) + if (nwords != min_params_per_line && nwords != max_params_per_line) error->all(FLERR,"Incorrect format in eos table/rx potential file"); // words = ptrs to all words in line @@ -344,8 +370,14 @@ void FixEOStableRX::read_file(char *file) for (ispecies = 0; ispecies < nspecies; ispecies++) if (strcmp(words[0],&atom->dname[ispecies][0]) == 0) break; - if (ispecies < nspecies) + if (ispecies < nspecies){ dHf[ispecies] = atof(words[1]); + if(nwords > min_params_per_line+1){ + energyCorr[ispecies] = atof(words[2]); + tempCorrCoeff[ispecies] = atof(words[3]); + moleculeCorrCoeff[ispecies] = atof(words[4]); + } + } } delete [] words; @@ -547,27 +579,33 @@ void FixEOStableRX::param_extract(Table *tb, char *line) error->one(FLERR,"Invalid keyword in fix eos/table/rx parameters"); word = strtok(NULL," \t\n\r\f"); - while (word) { - for (ispecies = 0; ispecies < nspecies; ispecies++) - if (strcmp(word,&atom->dname[ispecies][0]) == 0){ - eosSpecies[ncolumn] = ispecies; - ncolumn++; - break; + if(rx_flag){ + while (word) { + for (ispecies = 0; ispecies < nspecies; ispecies++) + if (strcmp(word,&atom->dname[ispecies][0]) == 0){ + eosSpecies[ncolumn] = ispecies; + ncolumn++; + break; + } + if (ispecies == nspecies){ + printf("name=%s not found in species list\n",word); + error->one(FLERR,"Invalid keyword in fix eos/table/rx parameters"); } - if (ispecies == nspecies){ - printf("name=%s not found in species list\n",word); - error->one(FLERR,"Invalid keyword in fix eos/table/rx parameters"); + word = strtok(NULL," \t\n\r\f"); } - word = strtok(NULL," \t\n\r\f"); + + for (int icolumn = 0; icolumn < ncolumn; icolumn++) + if(eosSpecies[icolumn]==-1) + error->one(FLERR,"EOS data is missing from fix eos/table/rx tabe"); + if(ncolumn != nspecies){ + printf("ncolumns=%d nspecies=%d\n",ncolumn,nspecies); + error->one(FLERR,"The number of columns in fix eos/table/rx does not match the number of species"); + } + } else { + eosSpecies[0] = 0; + ncolumn++; } - for (int icolumn = 0; icolumn < ncolumn; icolumn++) - if(eosSpecies[icolumn]==-1) - error->one(FLERR,"EOS data is missing from fix eos/table/rx tabe"); - if(ncolumn != nspecies){ - printf("ncolumns=%d nspecies=%d\n",ncolumn,nspecies); - error->one(FLERR,"The number of columns in fix eos/table/rx does not match the number of species"); - } if (tb->ninput == 0) error->one(FLERR,"fix eos/table/rx parameters did not set N"); } @@ -655,11 +693,27 @@ double FixEOStableRX::splint(double *xa, double *ya, double *y2a, int n, double void FixEOStableRX::energy_lookup(int id, double thetai, double &ui) { - int itable; - double fraction, uTmp, nTotal; + int itable, nPG; + double fraction, uTmp, nMolecules, nTotal, nTotalPG; + double tolerance = 1.0e-10; ui = 0.0; nTotal = 0.0; + nTotalPG = 0.0; + nPG = 0; + + if(rx_flag){ + for(int ispecies=0;ispeciesdvector[ispecies][id]; + if(fabs(moleculeCorrCoeff[ispecies]) > tolerance){ + nPG++; + nTotalPG += atom->dvector[ispecies][id]; + } + } + } else { + nTotal = 1.0; + } + for(int ispecies=0;ispecieslo); @@ -671,9 +725,13 @@ void FixEOStableRX::energy_lookup(int id, double thetai, double &ui) uTmp = tb->e[itable] + fraction*tb->de[itable]; uTmp += dHf[ispecies]; - // mol fraction form: - ui += atom->dvector[ispecies][id]*uTmp; - nTotal += atom->dvector[ispecies][id]; + uTmp += tempCorrCoeff[ispecies]*thetai; // temperature correction + uTmp += energyCorr[ispecies]; // energy correction + if(nPG > 0) ui += moleculeCorrCoeff[ispecies]*nTotalPG/double(nPG); // molecule correction + + if(rx_flag) nMolecules = atom->dvector[ispecies][id]; + else nMolecules = 1.0; + ui += nMolecules*uTmp; } } ui = ui - double(nTotal+1.5)*force->boltz*thetai; @@ -692,6 +750,7 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai) double maxit = 100; double temp; double delta = 0.001; + double tolerance = 1.0e-10; // Store the current thetai in t1 t1 = MAX(thetai,tb->lo); @@ -715,7 +774,7 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai) // Apply the Secant Method for(it=0; itone(FLERR,"NaN detected in secant solver."); temp = t1; temp = MAX(temp,tb->lo); @@ -726,7 +785,7 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai) break; } temp = t2 - f2*(t2-t1)/(f2-f1); - if(fabs(temp-t2) < 1e-6) break; + if(fabs(temp-t2) < tolerance) break; f1 = f2; t1 = t2; t2 = temp; diff --git a/src/USER-DPD/fix_eos_table_rx.h b/src/USER-DPD/fix_eos_table_rx.h index 078cf1e2e1..8c26d133a5 100644 --- a/src/USER-DPD/fix_eos_table_rx.h +++ b/src/USER-DPD/fix_eos_table_rx.h @@ -67,7 +67,7 @@ class FixEOStableRX : public Fix { void read_file(char *); - double *dHf; + double *dHf,*energyCorr,*tempCorrCoeff,*moleculeCorrCoeff; int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); @@ -76,6 +76,7 @@ class FixEOStableRX : public Fix { int *eosSpecies; int ncolumn; + bool rx_flag; }; } diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index 0bd560b241..b7330ba1ef 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -45,6 +45,12 @@ enum{LUCY}; #define MAXLINE 1024 #define DELTA 4 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + #define SparseKinetics_enableIntegralReactions (true) #define SparseKinetics_invalidIndex (-1) @@ -693,7 +699,6 @@ void FixRX::pre_force(int vflag) int *mask = atom->mask; double *dpdTheta = atom->dpdTheta; int newton_pair = force->newton_pair; - int ii; double theta; if(localTempFlag){ @@ -996,9 +1001,9 @@ void FixRX::rk4(int id, double *rwork) // Store the solution back in atom->dvector. for (int ispecies = 0; ispecies < nspecies; ispecies++){ - if(y[ispecies] < -1.0e-10) - error->one(FLERR,"Computed concentration in RK4 solver is < -1.0e-10"); - else if(y[ispecies] < 1e-15) + if(y[ispecies] < -MY_EPSILON) + error->one(FLERR,"Computed concentration in RK4 solver is < -10*DBL_EPSILON"); + else if(y[ispecies] < MY_EPSILON) y[ispecies] = 0.0; atom->dvector[ispecies][id] = y[ispecies]; } @@ -1515,7 +1520,7 @@ void FixRX::rkf45(int id, double *rwork) for (int ispecies = 0; ispecies < nspecies; ispecies++){ if(y[ispecies] < -1.0e-10) error->one(FLERR,"Computed concentration in RKF45 solver is < -1.0e-10"); - else if(y[ispecies] < 1e-20) + else if(y[ispecies] < MY_EPSILON) y[ispecies] = 0.0; atom->dvector[ispecies][id] = y[ispecies]; } diff --git a/src/USER-DPD/pair_exp6_rx.cpp b/src/USER-DPD/pair_exp6_rx.cpp index 202e0bf654..87a283179c 100644 --- a/src/USER-DPD/pair_exp6_rx.cpp +++ b/src/USER-DPD/pair_exp6_rx.cpp @@ -35,6 +35,12 @@ using namespace MathSpecial; #define MAXLINE 1024 #define DELTA 4 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + #define oneFluidApproxParameter (-1) #define isOneFluidApprox(_site) ( (_site) == oneFluidApproxParameter ) @@ -47,17 +53,17 @@ using namespace MathSpecial; struct PairExp6ParamDataType { int n; - double *epsilon1, *alpha1, *rm1, *fraction1, - *epsilon2, *alpha2, *rm2, *fraction2, - *epsilonOld1, *alphaOld1, *rmOld1, *fractionOld1, - *epsilonOld2, *alphaOld2, *rmOld2, *fractionOld2; + double *epsilon1, *alpha1, *rm1, *mixWtSite1, + *epsilon2, *alpha2, *rm2, *mixWtSite2, + *epsilonOld1, *alphaOld1, *rmOld1, *mixWtSite1old, + *epsilonOld2, *alphaOld2, *rmOld2, *mixWtSite2old; // Default constructor -- nullify everything. PairExp6ParamDataType(void) - : n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), fraction1(NULL), - epsilon2(NULL), alpha2(NULL), rm2(NULL), fraction2(NULL), - epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), fractionOld1(NULL), - epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), fractionOld2(NULL) + : n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), mixWtSite1(NULL), + epsilon2(NULL), alpha2(NULL), rm2(NULL), mixWtSite2(NULL), + epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), mixWtSite1old(NULL), + epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), mixWtSite2old(NULL) {} }; @@ -71,6 +77,7 @@ PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp) nparams = maxparam = 0; params = NULL; mol2param = NULL; + fractionalWeighting = true; } /* ---------------------------------------------------------------------- */ @@ -93,6 +100,11 @@ PairExp6rx::~PairExp6rx() memory->destroy(cutsq); memory->destroy(cut); } + if(scalingFlag == POLYNOMIAL){ + memory->destroy(coeffAlpha); + memory->destroy(coeffEps); + memory->destroy(coeffRm); + } } /* ---------------------------------------------------------------------- */ @@ -134,10 +146,10 @@ void PairExp6rx::compute(int eflag, int vflag) double epsilon2_j,alpha2_j,rm2_j; double evdwlOldEXP6_12, evdwlOldEXP6_21, fpairOldEXP6_12, fpairOldEXP6_21; double evdwlEXP6_12, evdwlEXP6_21; - double fractionOld1_i, fractionOld1_j; - double fractionOld2_i, fractionOld2_j; - double fraction1_i, fraction1_j; - double fraction2_i, fraction2_j; + double mixWtSite1old_i, mixWtSite1old_j; + double mixWtSite2old_i, mixWtSite2old_j; + double mixWtSite1_i, mixWtSite1_j; + double mixWtSite2_i, mixWtSite2_j; double *uCG = atom->uCG; double *uCGnew = atom->uCGnew; @@ -157,38 +169,38 @@ void PairExp6rx::compute(int eflag, int vflag) memory->create( PairExp6ParamData.epsilon1 , np_total, "PairExp6ParamData.epsilon1"); memory->create( PairExp6ParamData.alpha1 , np_total, "PairExp6ParamData.alpha1"); memory->create( PairExp6ParamData.rm1 , np_total, "PairExp6ParamData.rm1"); - memory->create( PairExp6ParamData.fraction1 , np_total, "PairExp6ParamData.fraction1"); + memory->create( PairExp6ParamData.mixWtSite1 , np_total, "PairExp6ParamData.mixWtSite1"); memory->create( PairExp6ParamData.epsilon2 , np_total, "PairExp6ParamData.epsilon2"); memory->create( PairExp6ParamData.alpha2 , np_total, "PairExp6ParamData.alpha2"); memory->create( PairExp6ParamData.rm2 , np_total, "PairExp6ParamData.rm2"); - memory->create( PairExp6ParamData.fraction2 , np_total, "PairExp6ParamData.fraction2"); + memory->create( PairExp6ParamData.mixWtSite2 , np_total, "PairExp6ParamData.mixWtSite2"); memory->create( PairExp6ParamData.epsilonOld1 , np_total, "PairExp6ParamData.epsilonOld1"); memory->create( PairExp6ParamData.alphaOld1 , np_total, "PairExp6ParamData.alphaOld1"); memory->create( PairExp6ParamData.rmOld1 , np_total, "PairExp6ParamData.rmOld1"); - memory->create( PairExp6ParamData.fractionOld1 , np_total, "PairExp6ParamData.fractionOld1"); + memory->create( PairExp6ParamData.mixWtSite1old , np_total, "PairExp6ParamData.mixWtSite1old"); memory->create( PairExp6ParamData.epsilonOld2 , np_total, "PairExp6ParamData.epsilonOld2"); memory->create( PairExp6ParamData.alphaOld2 , np_total, "PairExp6ParamData.alphaOld2"); memory->create( PairExp6ParamData.rmOld2 , np_total, "PairExp6ParamData.rmOld2"); - memory->create( PairExp6ParamData.fractionOld2 , np_total, "PairExp6ParamData.fractionOld2"); + memory->create( PairExp6ParamData.mixWtSite2old , np_total, "PairExp6ParamData.mixWtSite2old"); for (i = 0; i < np_total; ++i) { - getParamsEXP6 (i, PairExp6ParamData.epsilon1[i], + getMixingWeights (i, PairExp6ParamData.epsilon1[i], PairExp6ParamData.alpha1[i], PairExp6ParamData.rm1[i], - PairExp6ParamData.fraction1[i], + PairExp6ParamData.mixWtSite1[i], PairExp6ParamData.epsilon2[i], PairExp6ParamData.alpha2[i], PairExp6ParamData.rm2[i], - PairExp6ParamData.fraction2[i], + PairExp6ParamData.mixWtSite2[i], PairExp6ParamData.epsilonOld1[i], PairExp6ParamData.alphaOld1[i], PairExp6ParamData.rmOld1[i], - PairExp6ParamData.fractionOld1[i], + PairExp6ParamData.mixWtSite1old[i], PairExp6ParamData.epsilonOld2[i], PairExp6ParamData.alphaOld2[i], PairExp6ParamData.rmOld2[i], - PairExp6ParamData.fractionOld2[i]); + PairExp6ParamData.mixWtSite2old[i]); } } @@ -212,19 +224,19 @@ void PairExp6rx::compute(int eflag, int vflag) epsilon1_i = PairExp6ParamData.epsilon1[i]; alpha1_i = PairExp6ParamData.alpha1[i]; rm1_i = PairExp6ParamData.rm1[i]; - fraction1_i = PairExp6ParamData.fraction1[i]; + mixWtSite1_i = PairExp6ParamData.mixWtSite1[i]; epsilon2_i = PairExp6ParamData.epsilon2[i]; alpha2_i = PairExp6ParamData.alpha2[i]; rm2_i = PairExp6ParamData.rm2[i]; - fraction2_i = PairExp6ParamData.fraction2[i]; + mixWtSite2_i = PairExp6ParamData.mixWtSite2[i]; epsilonOld1_i = PairExp6ParamData.epsilonOld1[i]; alphaOld1_i = PairExp6ParamData.alphaOld1[i]; rmOld1_i = PairExp6ParamData.rmOld1[i]; - fractionOld1_i = PairExp6ParamData.fractionOld1[i]; + mixWtSite1old_i = PairExp6ParamData.mixWtSite1old[i]; epsilonOld2_i = PairExp6ParamData.epsilonOld2[i]; alphaOld2_i = PairExp6ParamData.alphaOld2[i]; rmOld2_i = PairExp6ParamData.rmOld2[i]; - fractionOld2_i = PairExp6ParamData.fractionOld2[i]; + mixWtSite2old_i = PairExp6ParamData.mixWtSite2old[i]; } for (jj = 0; jj < jnum; jj++) { @@ -259,19 +271,19 @@ void PairExp6rx::compute(int eflag, int vflag) epsilon1_j = PairExp6ParamData.epsilon1[j]; alpha1_j = PairExp6ParamData.alpha1[j]; rm1_j = PairExp6ParamData.rm1[j]; - fraction1_j = PairExp6ParamData.fraction1[j]; + mixWtSite1_j = PairExp6ParamData.mixWtSite1[j]; epsilon2_j = PairExp6ParamData.epsilon2[j]; alpha2_j = PairExp6ParamData.alpha2[j]; rm2_j = PairExp6ParamData.rm2[j]; - fraction2_j = PairExp6ParamData.fraction2[j]; + mixWtSite2_j = PairExp6ParamData.mixWtSite2[j]; epsilonOld1_j = PairExp6ParamData.epsilonOld1[j]; alphaOld1_j = PairExp6ParamData.alphaOld1[j]; rmOld1_j = PairExp6ParamData.rmOld1[j]; - fractionOld1_j = PairExp6ParamData.fractionOld1[j]; + mixWtSite1old_j = PairExp6ParamData.mixWtSite1old[j]; epsilonOld2_j = PairExp6ParamData.epsilonOld2[j]; alphaOld2_j = PairExp6ParamData.alphaOld2[j]; rmOld2_j = PairExp6ParamData.rmOld2[j]; - fractionOld2_j = PairExp6ParamData.fractionOld2[j]; + mixWtSite2old_j = PairExp6ParamData.mixWtSite2old[j]; } // A2. Apply Lorentz-Berthelot mixing rules for the i-j pair @@ -372,9 +384,9 @@ void PairExp6rx::compute(int eflag, int vflag) } if (isite1 == isite2) - evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12; + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12; else - evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*evdwlOldEXP6_21; + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*evdwlOldEXP6_21; evdwlOld *= factor_lj; @@ -455,8 +467,8 @@ void PairExp6rx::compute(int eflag, int vflag) // // Apply Mixing Rule to get the overall force for the CG pair // - if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12; - else fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*fpairOldEXP6_21; + if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12; + else fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*fpairOldEXP6_21; f[i][0] += delx*fpair; f[i][1] += dely*fpair; @@ -467,8 +479,8 @@ void PairExp6rx::compute(int eflag, int vflag) f[j][2] -= delz*fpair; } - if (isite1 == isite2) evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12; - else evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21; + if (isite1 == isite2) evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12; + else evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12 + sqrt(mixWtSite2_i*mixWtSite1_j)*evdwlEXP6_21; evdwl *= factor_lj; uCGnew[i] += 0.5*evdwl; @@ -488,19 +500,19 @@ void PairExp6rx::compute(int eflag, int vflag) if (PairExp6ParamData.epsilon1 ) memory->destroy(PairExp6ParamData.epsilon1); if (PairExp6ParamData.alpha1 ) memory->destroy(PairExp6ParamData.alpha1); if (PairExp6ParamData.rm1 ) memory->destroy(PairExp6ParamData.rm1); - if (PairExp6ParamData.fraction1 ) memory->destroy(PairExp6ParamData.fraction1); + if (PairExp6ParamData.mixWtSite1 ) memory->destroy(PairExp6ParamData.mixWtSite1); if (PairExp6ParamData.epsilon2 ) memory->destroy(PairExp6ParamData.epsilon2); if (PairExp6ParamData.alpha2 ) memory->destroy(PairExp6ParamData.alpha2); if (PairExp6ParamData.rm2 ) memory->destroy(PairExp6ParamData.rm2); - if (PairExp6ParamData.fraction2 ) memory->destroy(PairExp6ParamData.fraction2); + if (PairExp6ParamData.mixWtSite2 ) memory->destroy(PairExp6ParamData.mixWtSite2); if (PairExp6ParamData.epsilonOld1 ) memory->destroy(PairExp6ParamData.epsilonOld1); if (PairExp6ParamData.alphaOld1 ) memory->destroy(PairExp6ParamData.alphaOld1); if (PairExp6ParamData.rmOld1 ) memory->destroy(PairExp6ParamData.rmOld1); - if (PairExp6ParamData.fractionOld1) memory->destroy(PairExp6ParamData.fractionOld1); + if (PairExp6ParamData.mixWtSite1old) memory->destroy(PairExp6ParamData.mixWtSite1old); if (PairExp6ParamData.epsilonOld2 ) memory->destroy(PairExp6ParamData.epsilonOld2); if (PairExp6ParamData.alphaOld2 ) memory->destroy(PairExp6ParamData.alphaOld2); if (PairExp6ParamData.rmOld2 ) memory->destroy(PairExp6ParamData.rmOld2); - if (PairExp6ParamData.fractionOld2) memory->destroy(PairExp6ParamData.fractionOld2); + if (PairExp6ParamData.mixWtSite2old) memory->destroy(PairExp6ParamData.mixWtSite2old); } } @@ -530,10 +542,20 @@ void PairExp6rx::allocate() void PairExp6rx::settings(int narg, char **arg) { - if (narg != 1) error->all(FLERR,"Illegal pair_style command"); + if (narg < 1) error->all(FLERR,"Illegal pair_style command"); cut_global = force->numeric(FLERR,arg[0]); + // optional keywords + + int iarg = 1; + while (iarg < narg) { + if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; + else error->all(FLERR,"Illegal pair_style command"); + iarg++; + } + if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) @@ -551,7 +573,7 @@ void PairExp6rx::settings(int narg, char **arg) void PairExp6rx::coeff(int narg, char **arg) { - if (narg < 7 || narg > 8) error->all(FLERR,"Incorrect args for pair coefficients"); + if (narg < 6 || narg > 9) error->all(FLERR,"Incorrect args for pair coefficients"); bool rx_flag = false; for (int i = 0; i < modify->nfix; i++) @@ -628,21 +650,36 @@ void PairExp6rx::coeff(int narg, char **arg) params[iparam].potentialType = exp6PotentialType; else error->all(FLERR,"params[].potential type unknown"); - - //printf("params[%d].name= %s ispecies= %d potential= %s potentialType= %d\n", iparam, params[iparam].name, params[iparam].ispecies, params[iparam].potential, params[iparam].potentialType); } } delete[] site1; delete[] site2; site1 = site2 = NULL; - fuchslinR = force->numeric(FLERR,arg[5]); - fuchslinEpsilon = force->numeric(FLERR,arg[6]); - setup(); double cut_one = cut_global; - if (narg == 8) cut_one = force->numeric(FLERR,arg[7]); + if (strcmp(arg[5],"exponent") == 0){ + scalingFlag = EXPONENT; + exponentR = force->numeric(FLERR,arg[6]); + exponentEpsilon = force->numeric(FLERR,arg[7]); + if (narg > 9) error->all(FLERR,"Incorrect args for pair coefficients"); + if (narg == 9) cut_one = force->numeric(FLERR,arg[8]); + } else if (strcmp(arg[5],"polynomial") == 0){ + scalingFlag = POLYNOMIAL; + memory->create(coeffAlpha,6,"pair:coeffAlpha"); + memory->create(coeffEps,6,"pair:coeffEps"); + memory->create(coeffRm,6,"pair:coeffRm"); + read_file2(arg[6]); + if (narg > 8) error->all(FLERR,"Incorrect args for pair coefficients"); + if (narg == 8) cut_one = force->numeric(FLERR,arg[7]); + } else if (strcmp(arg[5],"none") == 0){ + scalingFlag = NONE; + if (narg > 7) error->all(FLERR,"Incorrect args for pair coefficients"); + if (narg == 7) cut_one = force->numeric(FLERR,arg[6]); + } else { + error->all(FLERR,"Incorrect args for pair coefficients"); + } int count = 0; for (int i = ilo; i <= ihi; i++) { @@ -784,6 +821,95 @@ void PairExp6rx::read_file(char *file) /* ---------------------------------------------------------------------- */ +void PairExp6rx::read_file2(char *file) +{ + int params_per_line = 7; + char **words = new char*[params_per_line+1]; + + // open file on proc 0 + + FILE *fp; + fp = NULL; + if (comm->me == 0) { + fp = fopen(file,"r"); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open polynomial file %s",file); + error->one(FLERR,str); + } + } + + // one set of params can span multiple lines + int n,nwords; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in polynomial file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + if (strcmp(words[0],"alpha") == 0){ + for (int ii=1; iidvector[ispecies][id]; - nTotal_old += atom->dvector[ispecies+nspecies][id]; + nTotalOld += atom->dvector[ispecies+nspecies][id]; iparam = mol2param[ispecies]; if (iparam < 0 || params[iparam].potentialType != exp6PotentialType ) continue; if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { if (isite1 == params[iparam].ispecies || isite2 == params[iparam].ispecies) continue; - nTotalOFA_old += atom->dvector[ispecies+nspecies][id]; - nTotalOFA += atom->dvector[ispecies][id]; + nMoleculesOFAold += atom->dvector[ispecies+nspecies][id]; + nMoleculesOFA += atom->dvector[ispecies][id]; } } - if(nTotal < 1e-8 || nTotal_old < 1e-8) - error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + if(nTotal < MY_EPSILON || nTotalOld < MY_EPSILON) + error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) - fractionOFA_old = nTotalOFA_old / nTotal_old; - fractionOFA = nTotalOFA / nTotal; + fractionOFAold = nMoleculesOFAold / nTotalOld; + fractionOFA = nMoleculesOFA / nTotal; for (int ispecies = 0; ispecies < nspecies; ispecies++) { iparam = mol2param[ispecies]; @@ -942,8 +1073,10 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm alpha1 = params[iparam].alpha; // Compute the mole fraction of Site1 - fraction1_old = atom->dvector[ispecies+nspecies][id]/nTotal_old; - fraction1 = atom->dvector[ispecies][id]/nTotal; + nMoleculesOld1 = atom->dvector[ispecies+nspecies][id]; + nMolecules1 = atom->dvector[ispecies][id]; + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; } // If Site2 matches a pure species, then grab the parameters @@ -956,7 +1089,9 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm alpha2 = params[iparam].alpha; // Compute the mole fraction of Site2 - fraction2_old = atom->dvector[ispecies+nspecies][id]/nTotal_old; + nMoleculesOld2 = atom->dvector[ispecies+nspecies][id]; + nMolecules2 = atom->dvector[ispecies][id]; + fractionOld2 = atom->dvector[ispecies+nspecies][id]/nTotalOld; fraction2 = atom->dvector[ispecies][id]/nTotal; } @@ -966,8 +1101,10 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm rmi = params[iparam].rm; epsiloni = params[iparam].epsilon; alphai = params[iparam].alpha; - xMolei = atom->dvector[ispecies][id]/nTotalOFA; - xMolei_old = atom->dvector[ispecies+nspecies][id]/nTotalOFA_old; + if(nMoleculesOFAdvector[ispecies][id]/nMoleculesOFA; + if(nMoleculesOFAolddvector[ispecies+nspecies][id]/nMoleculesOFAold; for (int jspecies = 0; jspecies < nspecies; jspecies++) { jparam = mol2param[jspecies]; @@ -976,15 +1113,17 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm rmj = params[jparam].rm; epsilonj = params[jparam].epsilon; alphaj = params[jparam].alpha; - xMolej = atom->dvector[jspecies][id]/nTotalOFA; - xMolej_old = atom->dvector[jspecies+nspecies][id]/nTotalOFA_old; + if(nMoleculesOFAdvector[jspecies][id]/nMoleculesOFA; + if(nMoleculesOFAolddvector[jspecies+nspecies][id]/nMoleculesOFAold; rmij = (rmi+rmj)/2.0; rm3ij = rmij*rmij*rmij; epsilonij = sqrt(epsiloni*epsilonj); alphaij = sqrt(alphai*alphaj); - if(fractionOFA_old > 0.0){ + if(fractionOFAold > 0.0){ rm3_old += xMolei_old*xMolej_old*rm3ij; epsilon_old += xMolei_old*xMolej_old*rm3ij*epsilonij; alpha_old += xMolei_old*xMolej_old*rm3ij*epsilonij*alphaij; @@ -1000,7 +1139,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm if (isOneFluidApprox(isite1)){ rm1 = cbrt(rm3); - if(rm1 < 1e-16) { + if(rm1 < MY_EPSILON) { rm1 = 0.0; epsilon1 = 0.0; alpha1 = 0.0; @@ -1008,11 +1147,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm epsilon1 = epsilon / rm3; alpha1 = alpha / epsilon1 / rm3; } - + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); fraction1 = fractionOFA; rm1_old = cbrt(rm3_old); - if(rm1_old < 1e-16) { + if(rm1_old < MY_EPSILON) { rm1_old = 0.0; epsilon1_old = 0.0; alpha1_old = 0.0; @@ -1020,42 +1159,21 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm epsilon1_old = epsilon_old / rm3_old; alpha1_old = alpha_old / epsilon1_old / rm3_old; } - fraction1_old = fractionOFA_old; + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + fractionOld1 = fractionOFAold; - // Fuchslin-Like Exp-6 Scaling - double powfuch = 0.0; - if(fuchslinEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinEpsilon); - if(powfuch<1e-15) epsilon1 = 0.0; - else epsilon1 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); - if(powfuch<1e-15) epsilon1_old = 0.0; - else epsilon1_old *= 1.0/powfuch; - - } else { - epsilon1 *= pow(nTotalOFA,fuchslinEpsilon); - epsilon1_old *= pow(nTotalOFA_old,fuchslinEpsilon); - } - - if(fuchslinR < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinR); - if(powfuch<1e-15) rm1 = 0.0; - else rm1 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-fuchslinR); - if(powfuch<1e-15) rm1_old = 0.0; - else rm1_old *= 1.0/powfuch; - - } else { - rm1 *= pow(nTotalOFA,fuchslinR); - rm1_old *= pow(nTotalOFA_old,fuchslinR); + if(scalingFlag == EXPONENT){ + exponentScaling(nMoleculesOFA,epsilon1,rm1); + exponentScaling(nMoleculesOFAold,epsilon1_old,rm1_old); + } else if(scalingFlag == POLYNOMIAL){ + polynomialScaling(nMoleculesOFA,alpha1,epsilon1,rm1); + polynomialScaling(nMoleculesOFAold,alpha1_old,epsilon1_old,rm1_old); } } if (isOneFluidApprox(isite2)){ rm2 = cbrt(rm3); - if(rm2 < 1e-16) { + if(rm2 < MY_EPSILON) { rm2 = 0.0; epsilon2 = 0.0; alpha2 = 0.0; @@ -1063,10 +1181,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm epsilon2 = epsilon / rm3; alpha2 = alpha / epsilon2 / rm3; } + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); fraction2 = fractionOFA; rm2_old = cbrt(rm3_old); - if(rm2_old < 1e-16) { + if(rm2_old < MY_EPSILON) { rm2_old = 0.0; epsilon2_old = 0.0; alpha2_old = 0.0; @@ -1074,64 +1193,96 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm epsilon2_old = epsilon_old / rm3_old; alpha2_old = alpha_old / epsilon2_old / rm3_old; } - fraction2_old = fractionOFA_old; + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + fractionOld2 = fractionOFAold; - // Fuchslin-Like Exp-6 Scaling - double powfuch = 0.0; - if(fuchslinEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinEpsilon); - if(powfuch<1e-15) epsilon2 = 0.0; - else epsilon2 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-fuchslinEpsilon); - if(powfuch<1e-15) epsilon2_old = 0.0; - else epsilon2_old *= 1.0/powfuch; - - } else { - epsilon2 *= pow(nTotalOFA,fuchslinEpsilon); - epsilon2_old *= pow(nTotalOFA_old,fuchslinEpsilon); - } - - if(fuchslinR < 0.0){ - powfuch = pow(nTotalOFA,-fuchslinR); - if(powfuch<1e-15) rm2 = 0.0; - else rm2 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-fuchslinR); - if(powfuch<1e-15) rm2_old = 0.0; - else rm2_old *= 1.0/powfuch; - - } else { - rm2 *= pow(nTotalOFA,fuchslinR); - rm2_old *= pow(nTotalOFA_old,fuchslinR); + if(scalingFlag == EXPONENT){ + exponentScaling(nMoleculesOFA,epsilon2,rm2); + exponentScaling(nMoleculesOFAold,epsilon2_old,rm2_old); + } else if(scalingFlag == POLYNOMIAL){ + polynomialScaling(nMoleculesOFA,alpha2,epsilon2,rm2); + polynomialScaling(nMoleculesOFAold,alpha2_old,epsilon2_old,rm2_old); } } // Check that no fractions are less than zero - if(fraction1 < 0.0){ - if(fraction1 < -1.0e-10){ - error->all(FLERR,"Computed fraction less than -1.0e-10"); + if(fraction1 < 0.0 || nMolecules1 < 0.0){ + if(fraction1 < -MY_EPSILON || nMolecules1 < -MY_EPSILON){ + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); } + nMolecules1 = 0.0; fraction1 = 0.0; } - if(fraction2 < 0.0){ - if(fraction2 < -1.0e-10){ - error->all(FLERR,"Computed fraction less than -1.0e-10"); + if(fraction2 < 0.0 || nMolecules2 < 0.0){ + if(fraction2 < -MY_EPSILON || nMolecules2 < -MY_EPSILON){ + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); } + nMolecules2 = 0.0; fraction2 = 0.0; } - if(fraction1_old < 0.0){ - if(fraction1_old < -1.0e-10){ - error->all(FLERR,"Computed fraction less than -1.0e-10"); + if(fractionOld1 < 0.0 || nMoleculesOld1 < 0.0){ + if(fractionOld1 < -MY_EPSILON || nMoleculesOld1 < -MY_EPSILON){ + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); } - fraction1_old = 0.0; + nMoleculesOld1 = 0.0; + fractionOld1 = 0.0; } - if(fraction2_old < 0.0){ - if(fraction2_old < -1.0e-10){ - error->all(FLERR,"Computed fraction less than -1.0e-10"); + if(fractionOld2 < 0.0 || nMoleculesOld2 < 0.0){ + if(fractionOld2 < -MY_EPSILON || nMoleculesOld2 < -MY_EPSILON){ + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); } - fraction2_old = 0.0; + nMoleculesOld2 = 0.0; + fractionOld2 = 0.0; } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairExp6rx::exponentScaling(double phi, double &epsilon, double &rm) const +{ + double powfuch; + + if(exponentEpsilon < 0.0){ + powfuch = pow(phi,-exponentEpsilon); + if(powfuchnghost; int newton_pair = force->newton_pair; - double fractionOld1_i,fractionOld1_j; - double fractionOld2_i,fractionOld2_j; - double fraction1_i; + double mixWtSite1old_i,mixWtSite1old_j; + double mixWtSite2old_i,mixWtSite2old_j; + double mixWtSite1_i; double *uCG = atom->uCG; double *uCGnew = atom->uCGnew; @@ -126,20 +133,20 @@ void PairMultiLucyRX::compute(int eflag, int vflag) int jtable; double *rho = atom->rho; - double *fractionOld1 = NULL; - double *fractionOld2 = NULL; - double *fraction1 = NULL; - double *fraction2 = NULL; + double *mixWtSite1old = NULL; + double *mixWtSite2old = NULL; + double *mixWtSite1 = NULL; + double *mixWtSite2 = NULL; { const int ntotal = nlocal + nghost; - memory->create(fractionOld1, ntotal, "PairMultiLucyRX::fractionOld1"); - memory->create(fractionOld2, ntotal, "PairMultiLucyRX::fractionOld2"); - memory->create(fraction1, ntotal, "PairMultiLucyRX::fraction1"); - memory->create(fraction2, ntotal, "PairMultiLucyRX::fraction2"); + memory->create(mixWtSite1old, ntotal, "PairMultiLucyRX::mixWtSite1old"); + memory->create(mixWtSite2old, ntotal, "PairMultiLucyRX::mixWtSite2old"); + memory->create(mixWtSite1, ntotal, "PairMultiLucyRX::mixWtSite1"); + memory->create(mixWtSite2, ntotal, "PairMultiLucyRX::mixWtSite2"); for (int i = 0; i < ntotal; ++i) - getParams(i, fractionOld1[i], fractionOld2[i], fraction1[i], fraction2[i]); + getMixingWeights(i, mixWtSite1old[i], mixWtSite2old[i], mixWtSite1[i], mixWtSite2[i]); } inum = list->inum; @@ -164,9 +171,9 @@ void PairMultiLucyRX::compute(int eflag, int vflag) double fy_i = 0.0; double fz_i = 0.0; - fractionOld1_i = fractionOld1[i]; - fractionOld2_i = fractionOld2[i]; - fraction1_i = fraction1[i]; + mixWtSite1old_i = mixWtSite1old[i]; + mixWtSite2old_i = mixWtSite2old[i]; + mixWtSite1_i = mixWtSite1[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -181,8 +188,8 @@ void PairMultiLucyRX::compute(int eflag, int vflag) if (rsq < cutsq[itype][jtype]) { fpair = 0.0; - fractionOld1_j = fractionOld1[j]; - fractionOld2_j = fractionOld2[j]; + mixWtSite1old_j = mixWtSite1old[j]; + mixWtSite2old_j = mixWtSite2old[j]; tb = &tables[tabindex[itype][jtype]]; if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){ @@ -237,8 +244,8 @@ void PairMultiLucyRX::compute(int eflag, int vflag) } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); - if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; - else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair; + if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpair; + else fpair = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*fpair; fx_i += delx*fpair; fy_i += dely*fpair; @@ -270,8 +277,8 @@ void PairMultiLucyRX::compute(int eflag, int vflag) } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx"); evdwl *=(pi*cutsq[itype][itype]*cutsq[itype][itype])/84.0; - evdwlOld = fractionOld1_i*evdwl; - evdwl = fraction1_i*evdwl; + evdwlOld = mixWtSite1old_i*evdwl; + evdwl = mixWtSite1_i*evdwl; uCG[i] += evdwlOld; uCGnew[i] += evdwl; @@ -283,10 +290,10 @@ void PairMultiLucyRX::compute(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); - memory->destroy(fractionOld1); - memory->destroy(fractionOld2); - memory->destroy(fraction1); - memory->destroy(fraction2); + memory->destroy(mixWtSite1old); + memory->destroy(mixWtSite2old); + memory->destroy(mixWtSite1); + memory->destroy(mixWtSite2); } /* ---------------------------------------------------------------------- @@ -313,7 +320,7 @@ void PairMultiLucyRX::allocate() void PairMultiLucyRX::settings(int narg, char **arg) { - if (narg != 2) error->all(FLERR,"Illegal pair_style command"); + if (narg < 2) error->all(FLERR,"Illegal pair_style command"); // new settings @@ -324,6 +331,16 @@ void PairMultiLucyRX::settings(int narg, char **arg) tablength = force->inumeric(FLERR,arg[1]); if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries"); + // optional keywords + + int iarg = 2; + while (iarg < narg) { + if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; + else error->all(FLERR,"Illegal pair_style command"); + iarg++; + } + // delete old tables, since cannot just change settings for (int m = 0; m < ntables; m++) free_table(&tables[m]); @@ -930,9 +947,14 @@ void PairMultiLucyRX::computeLocalDensity() /* ---------------------------------------------------------------------- */ -void PairMultiLucyRX::getParams(int id, double &fractionOld1, double &fractionOld2, double &fraction1, double &fraction2) +void PairMultiLucyRX::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) { - double fractionOld, fraction; + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; double nTotal, nTotalOld; nTotal = 0.0; @@ -943,32 +965,56 @@ void PairMultiLucyRX::getParams(int id, double &fractionOld1, double &fractionOl } if (isOneFluid(isite1) == false){ - fractionOld1 = atom->dvector[isite1+nspecies][id]/nTotalOld; - fraction1 = atom->dvector[isite1][id]/nTotal; + nMoleculesOld1 = atom->dvector[isite1+nspecies][id]; + nMolecules1 = atom->dvector[isite1][id]; + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; } if (isOneFluid(isite2) == false){ - fractionOld2 = atom->dvector[isite2+nspecies][id]/nTotalOld; - fraction2 = atom->dvector[isite2][id]/nTotal; + nMoleculesOld2 = atom->dvector[isite2+nspecies][id]; + nMolecules2 = atom->dvector[isite2][id]; + fractionOld2 = nMoleculesOld2/nTotalOld; + fraction2 = nMolecules2/nTotal; } if (isOneFluid(isite1) || isOneFluid(isite2)){ - fractionOld = 0.0; - fraction = 0.0; + nMoleculesOFAold = 0.0; + nMoleculesOFA = 0.0; + fractionOFAold = 0.0; + fractionOFA = 0.0; for (int ispecies = 0; ispecies < nspecies; ispecies++){ if (isite1 == ispecies || isite2 == ispecies) continue; - fractionOld += atom->dvector[ispecies+nspecies][id] / nTotalOld; - fraction += atom->dvector[ispecies][id] / nTotal; + nMoleculesOFAold += atom->dvector[ispecies+nspecies][id]; + nMoleculesOFA += atom->dvector[ispecies][id]; + fractionOFAold += atom->dvector[ispecies+nspecies][id] / nTotalOld; + fractionOFA += atom->dvector[ispecies][id] / nTotal; } if (isOneFluid(isite1)){ - fractionOld1 = fractionOld; - fraction1 = fraction; + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); + fractionOld1 = fractionOFAold; + fraction1 = fractionOFA; } if (isOneFluid(isite2)){ - fractionOld2 = fractionOld; - fraction2 = fraction; + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); + fractionOld2 = fractionOFAold; + fraction2 = fractionOFA; } } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-DPD/pair_multi_lucy_rx.h b/src/USER-DPD/pair_multi_lucy_rx.h index 0562739c50..5975bd6ccd 100644 --- a/src/USER-DPD/pair_multi_lucy_rx.h +++ b/src/USER-DPD/pair_multi_lucy_rx.h @@ -78,7 +78,8 @@ class PairMultiLucyRX : public Pair { int nspecies; char *site1, *site2; int isite1, isite2; - void getParams(int, double &, double &, double &, double &); + void getMixingWeights(int, double &, double &, double &, double &); + bool fractionalWeighting; }; diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index 463e1838c6..e3cacc6155 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -35,6 +35,12 @@ enum{NONE,RLINEAR,RSQ,BMP}; #define MAXLINE 1024 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + #define OneFluidValue (-1) #define isOneFluid(_site_) ( (_site_) == OneFluidValue ) @@ -44,6 +50,7 @@ PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp) { ntables = 0; tables = NULL; + fractionalWeighting = true; } /* ---------------------------------------------------------------------- */ @@ -84,21 +91,6 @@ void PairTableRX::compute(int eflag, int vflag) if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; - double *fractionOld1, *fractionOld2; - double *fraction1, *fraction2; - - { - const int ntotal = atom->nlocal + atom->nghost; - - memory->create(fractionOld1, ntotal, "PairTableRx::compute::fractionOld1"); - memory->create(fractionOld2, ntotal, "PairTableRx::compute::fractionOld2"); - memory->create(fraction1, ntotal, "PairTableRx::compute::fraction1"); - memory->create(fraction2, ntotal, "PairTableRx::compute::fraction2"); - - for (int i = 0; i < ntotal; ++i) - getParams(i, fractionOld1[i], fractionOld2[i], fraction1[i], fraction2[i]); - } - double **x = atom->x; double **f = atom->f; int *type = atom->type; @@ -106,13 +98,29 @@ void PairTableRX::compute(int eflag, int vflag) double *special_lj = force->special_lj; int newton_pair = force->newton_pair; - double fractionOld1_i, fractionOld1_j; - double fractionOld2_i, fractionOld2_j; - double fraction1_i, fraction1_j; - double fraction2_i, fraction2_j; + double mixWtSite1old_i, mixWtSite1old_j; + double mixWtSite2old_i, mixWtSite2old_j; + double mixWtSite1_i, mixWtSite1_j; + double mixWtSite2_i, mixWtSite2_j; double *uCG = atom->uCG; double *uCGnew = atom->uCGnew; + double *mixWtSite1old = NULL; + double *mixWtSite2old = NULL; + double *mixWtSite1 = NULL; + double *mixWtSite2 = NULL; + + { + const int ntotal = atom->nlocal + atom->nghost; + memory->create(mixWtSite1old, ntotal, "PairTableRx::compute::mixWtSite1old"); + memory->create(mixWtSite2old, ntotal, "PairTableRx::compute::mixWtSite2old"); + memory->create(mixWtSite1, ntotal, "PairTableRx::compute::mixWtSite1"); + memory->create(mixWtSite2, ntotal, "PairTableRx::compute::mixWtSite2"); + + for (int i = 0; i < ntotal; ++i) + getMixingWeights(i, mixWtSite1old[i], mixWtSite2old[i], mixWtSite1[i], mixWtSite2[i]); + } + inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; @@ -132,10 +140,10 @@ void PairTableRX::compute(int eflag, int vflag) double uCGnew_i = 0.0; double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; - fractionOld1_i = fractionOld1[i]; - fractionOld2_i = fractionOld2[i]; - fraction1_i = fraction1[i]; - fraction2_i = fraction2[i]; + mixWtSite1old_i = mixWtSite1old[i]; + mixWtSite2old_i = mixWtSite2old[i]; + mixWtSite1_i = mixWtSite1[i]; + mixWtSite2_i = mixWtSite2[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -149,10 +157,10 @@ void PairTableRX::compute(int eflag, int vflag) jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - fractionOld1_j = fractionOld1[j]; - fractionOld2_j = fractionOld2[j]; - fraction1_j = fraction1[j]; - fraction2_j = fraction2[j]; + mixWtSite1old_j = mixWtSite1old[j]; + mixWtSite2old_j = mixWtSite2old[j]; + mixWtSite1_j = mixWtSite1[j]; + mixWtSite2_j = mixWtSite2[j]; tb = &tables[tabindex[itype][jtype]]; if (rsq < tb->innersq) @@ -188,8 +196,8 @@ void PairTableRX::compute(int eflag, int vflag) value = tb->f[itable] + fraction*tb->df[itable]; fpair = factor_lj * value; } - if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; - else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair; + if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpair; + else fpair = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*fpair; fx_i += delx*fpair; fy_i += dely*fpair; @@ -210,11 +218,11 @@ void PairTableRX::compute(int eflag, int vflag) ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6; if (isite1 == isite2){ - evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwl; - evdwl = sqrt(fraction1_i*fraction2_j)*evdwl; + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; + evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; } else { - evdwlOld = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*evdwl; - evdwl = (sqrt(fraction1_i*fraction2_j) + sqrt(fraction2_i*fraction1_j))*evdwl; + evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; + evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; } evdwlOld *= factor_lj; evdwl *= factor_lj; @@ -240,10 +248,10 @@ void PairTableRX::compute(int eflag, int vflag) } if (vflag_fdotr) virial_fdotr_compute(); - memory->destroy(fractionOld1); - memory->destroy(fractionOld2); - memory->destroy(fraction1); - memory->destroy(fraction2); + memory->destroy(mixWtSite1old); + memory->destroy(mixWtSite2old); + memory->destroy(mixWtSite1); + memory->destroy(mixWtSite2); } /* ---------------------------------------------------------------------- @@ -293,6 +301,8 @@ void PairTableRX::settings(int narg, char **arg) else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1; else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1; else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1; + else if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; else error->all(FLERR,"Illegal pair_style command"); iarg++; } @@ -1061,17 +1071,17 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq, int tlm1 = tablength - 1; Table *tb = &tables[tabindex[itype][jtype]]; - double fraction1_i, fraction1_j; - double fraction2_i, fraction2_j; - double fractionOld1_i, fractionOld1_j; - double fractionOld2_i, fractionOld2_j; + double mixWtSite1_i, mixWtSite1_j; + double mixWtSite2_i, mixWtSite2_j; + double mixWtSite1old_i, mixWtSite1old_j; + double mixWtSite2old_i, mixWtSite2old_j; fraction = 0.0; a = 0.0; b = 0.0; - getParams(i,fractionOld1_i,fractionOld2_i,fraction1_i,fraction2_i); - getParams(j,fractionOld1_j,fractionOld2_j,fraction1_j,fraction2_j); + getMixingWeights(i,mixWtSite1old_i,mixWtSite2old_i,mixWtSite1_i,mixWtSite2_i); + getMixingWeights(j,mixWtSite1old_j,mixWtSite2old_j,mixWtSite1_j,mixWtSite2_j); if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff"); @@ -1104,8 +1114,8 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq, fforce = factor_lj * value; } - if (isite1 == isite2) fforce = sqrt(fraction1_i*fraction2_j)*fforce; - else fforce = (sqrt(fraction1_i*fraction2_j) + sqrt(fraction2_i*fraction1_j))*fforce; + if (isite1 == isite2) fforce = sqrt(mixWtSite1_i*mixWtSite2_j)*fforce; + else fforce = (sqrt(mixWtSite1_i*mixWtSite2_j) + sqrt(mixWtSite2_i*mixWtSite1_j))*fforce; if (tabstyle == LOOKUP) phi = tb->e[itable]; @@ -1115,8 +1125,8 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq, phi = a * tb->e[itable] + b * tb->e[itable+1] + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6; - if (isite1 == isite2) phi = sqrt(fraction1_i*fraction2_j)*phi; - else phi = (sqrt(fraction1_i*fraction2_j) + sqrt(fraction2_i*fraction1_j))*phi; + if (isite1 == isite2) phi = sqrt(mixWtSite1_i*mixWtSite2_j)*phi; + else phi = (sqrt(mixWtSite1_i*mixWtSite2_j) + sqrt(mixWtSite2_i*mixWtSite1_j))*phi; return factor_lj*phi; } @@ -1143,46 +1153,74 @@ void *PairTableRX::extract(const char *str, int &dim) /* ---------------------------------------------------------------------- */ -void PairTableRX::getParams(int id, double &fractionOld1, double &fractionOld2, double &fraction1, double &fraction2) +void PairTableRX::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) { - double nTotal = 0.0; - double nTotalOld = 0.0; + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; + double nTotal, nTotalOld; + + nTotal = 0.0; + nTotalOld = 0.0; for (int ispecies = 0; ispecies < nspecies; ++ispecies){ nTotal += atom->dvector[ispecies][id]; nTotalOld += atom->dvector[ispecies+nspecies][id]; } - if(nTotal < 1e-8 || nTotalOld < 1e-8) - error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + if(nTotal < MY_EPSILON || nTotalOld < MY_EPSILON) + error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); if (isOneFluid(isite1) == false){ - fractionOld1 = atom->dvector[isite1+nspecies][id]/nTotalOld; - fraction1 = atom->dvector[isite1][id]/nTotal; + nMoleculesOld1 = atom->dvector[isite1+nspecies][id]; + nMolecules1 = atom->dvector[isite1][id]; + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; } if (isOneFluid(isite2) == false){ - fractionOld2 = atom->dvector[isite2+nspecies][id]/nTotalOld; - fraction2 = atom->dvector[isite2][id]/nTotal; + nMoleculesOld2 = atom->dvector[isite2+nspecies][id]; + nMolecules2 = atom->dvector[isite2][id]; + fractionOld2 = nMoleculesOld2/nTotalOld; + fraction2 = nMolecules2/nTotal; } if (isOneFluid(isite1) || isOneFluid(isite2)){ - double fractionOld = 0.0; - double fraction = 0.0; + nMoleculesOFAold = 0.0; + nMoleculesOFA = 0.0; + fractionOFAold = 0.0; + fractionOFA = 0.0; for (int ispecies = 0; ispecies < nspecies; ispecies++){ if (isite1 == ispecies || isite2 == ispecies) continue; - - fractionOld += atom->dvector[ispecies+nspecies][id]/nTotalOld; - fraction += atom->dvector[ispecies][id]/nTotal; + nMoleculesOFAold += atom->dvector[ispecies+nspecies][id]; + nMoleculesOFA += atom->dvector[ispecies][id]; + fractionOFAold += atom->dvector[ispecies+nspecies][id]/nTotalOld; + fractionOFA += atom->dvector[ispecies][id]/nTotal; } - if(isOneFluid(isite1)){ - fractionOld1 = fractionOld; - fraction1 = fraction; + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); + fractionOld1 = fractionOFAold; + fraction1 = fractionOFA; } - if(isOneFluid(isite2)){ - fractionOld2 = fractionOld; - fraction2 = fraction; + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); + fractionOld2 = fractionOFAold; + fraction2 = fractionOFA; } } -} + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } +} diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index f04ebced20..c6afe6a8d5 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -72,7 +72,8 @@ class PairTableRX : public Pair { int nspecies; char *site1, *site2; int isite1, isite2; - void getParams(int, double &, double &, double &, double &); + void getMixingWeights(int, double &, double &, double &, double &); + bool fractionalWeighting; }; @@ -163,7 +164,7 @@ When using pair style table with a long-range KSpace solver, the cutoffs for all atom type pairs must all be the same, since the long-range solver starts at that cutoff. -E: The number of molecules in CG particle is less than 1e-8 +E: The number of molecules in CG particle is less than 10*DBL_EPSILON Self-explanatory. Check the species concentrations have been properly set and check the reaction kinetic solver parameters in fix rx to more for From 2af10cb8da9b8bbac64cead53eb7cae57088ed7c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Jan 2017 10:09:44 -0700 Subject: [PATCH 034/267] Updating fix_eos_table_rx_kokkos to USER-DPD changes --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 64 ++++++++++++++++++++++---- src/KOKKOS/fix_eos_table_rx_kokkos.h | 4 +- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index c47923680c..aff2cdfa2d 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -29,6 +29,13 @@ #define MAXLINE 1024 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + + using namespace LAMMPS_NS; using namespace FixConst; @@ -51,11 +58,31 @@ FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag"); k_dHf = DAT::tdual_float_1d("fix:dHf",nspecies); - for (int n = 0; n < nspecies; n++) + k_energyCorr = DAT::tdual_float_1d("fix:energyCorr",nspecies); + k_tempCorrCoeff = DAT::tdual_float_1d("fix:tempCorrCoeff",nspecies); + k_moleculeCorrCoeff = DAT::tdual_float_1d("fix:moleculeCorrCoeff",nspecies); + for (int n = 0; n < nspecies; n++) { k_dHf.h_view(n) = dHf[n]; + k_energyCorr.h_view(n) = energyCorr[n]; + k_tempCorrCoeff.h_view(n) = tempCorrCoeff[n]; + k_moleculeCorrCoeff.h_view(n) = moleculeCorrCoeff[n]; + } + k_dHf.modify(); k_dHf.sync(); d_dHf = k_dHf.view(); + + k_energyCorr.modify(); + k_energyCorr.sync(); + d_energyCorr = k_energyCorr.view(); + + k_tempCorrCoeff.modify(); + k_tempCorrCoeff.sync(); + d_tempCorrCoeff = k_tempCorrCoeff.view(); + + k_moleculeCorrCoeff.modify(); + k_moleculeCorrCoeff.sync(); + d_moleculeCorrCoeff = k_moleculeCorrCoeff.view(); } /* ---------------------------------------------------------------------- */ @@ -268,11 +295,27 @@ template KOKKOS_INLINE_FUNCTION void FixEOStableRXKokkos::energy_lookup(int id, double thetai, double &ui) const { - int itable; - double fraction, uTmp, nTotal; + int itable, nPG; + double fraction, uTmp, nMolecules, nTotal, nTotalPG; + double tolerance = 1.0e-10; ui = 0.0; nTotal = 0.0; + nTotalPG = 0.0; + nPG = 0; + + if (rx_flag) { + for (int ispecies = 0; ispecies < nspecies; ispecies++ ) { + nTotal += dvector(ispecies,id); + if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) { + nPG++; + nTotalPG += dvector(ispecies,id); + } + } + } else { + nTotal = 1.0; + } + for(int ispecies=0;ispecieslo); @@ -289,9 +332,13 @@ void FixEOStableRXKokkos::energy_lookup(int id, double thetai, doubl uTmp = d_table_const.e(ispecies,itable) + fraction*d_table_const.de(ispecies,itable); uTmp += d_dHf[ispecies]; - // mol fraction form: - ui += dvector(ispecies,id)*uTmp; - nTotal += dvector(ispecies,id); + uTmp += d_tempCorrCoeff[ispecies]*thetai; // temperature correction + uTmp += d_energyCorr[ispecies]; // energy correction + if (nPG > 0) ui += d_moleculeCorrCoeff[ispecies]*nTotalPG/double(nPG); // molecule correction + + if (rx_flag) nMolecules = dvector(ispecies,id); + else nMolecules = 1.0; + ui += nMolecules*uTmp; } } ui = ui - double(nTotal+1.5)*boltz*thetai; @@ -312,6 +359,7 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub double maxit = 100; double temp; double delta = 0.001; + double tolerance = 1.0e-10; int lo = d_table_const.lo(0); int hi = d_table_const.hi(0); @@ -337,7 +385,7 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub // Apply the Secant Method for(it=0; it::temperature_lookup(int id, double ui, doub break; } temp = t2 - f2*(t2-t1)/(f2-f1); - if(fabs(temp-t2) < 1e-6) break; + if(fabs(temp-t2) < tolerance) break; f1 = f2; t1 = t2; t2 = temp; diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.h b/src/KOKKOS/fix_eos_table_rx_kokkos.h index d4a5094ae0..91d73f1036 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.h +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.h @@ -112,8 +112,8 @@ class FixEOStableRXKokkos : public FixEOStableRX { int update_table; void create_kokkos_tables(); - DAT::tdual_float_1d k_dHf; - typename AT::t_float_1d d_dHf; + DAT::tdual_float_1d k_dHf,k_energyCorr,k_tempCorrCoeff,k_moleculeCorrCoeff; + typename AT::t_float_1d d_dHf,d_energyCorr,d_tempCorrCoeff,d_moleculeCorrCoeff; typename AT::t_int_1d mask; typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem; From f220b07625b7089b981fcaeff999bae3712b6a3a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Jan 2017 10:36:55 -0700 Subject: [PATCH 035/267] Updating pair_exp6_rx_kokkos to USER-DPD changes --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 306 +++++++++++++++++------------ src/KOKKOS/pair_exp6_rx_kokkos.h | 30 ++- src/USER-DPD/pair_exp6_rx.h | 2 +- 3 files changed, 201 insertions(+), 137 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index ce3b547435..3ce6b78e57 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -41,6 +41,12 @@ using namespace MathSpecial; #define MAXLINE 1024 #define DELTA 4 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + #define oneFluidApproxParameter (-1) #define isOneFluidApprox(_site) ( (_site) == oneFluidApproxParameter ) @@ -165,29 +171,29 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); - PairExp6ParamData.fraction1 = typename AT::t_float_1d("PairExp6ParamData.fraction1" ,np_total); + PairExp6ParamData.mixWtSite1 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1" ,np_total); PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); - PairExp6ParamData.fraction2 = typename AT::t_float_1d("PairExp6ParamData.fraction2" ,np_total); + PairExp6ParamData.mixWtSite2 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2" ,np_total); PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); - PairExp6ParamData.fractionOld1 = typename AT::t_float_1d("PairExp6ParamData.fractionOld1",np_total); + PairExp6ParamData.mixWtSite1old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1old",np_total); PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); - PairExp6ParamData.fractionOld2 = typename AT::t_float_1d("PairExp6ParamData.fractionOld2",np_total); + PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); - Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); } k_error_flag.template modify(); k_error_flag.template sync(); if (k_error_flag.h_view() == 1) - error->all(FLERR,"The number of molecules in CG particle is less than 1e-8."); + error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); else if (k_error_flag.h_view() == 2) - error->all(FLERR,"Computed fraction less than -1.0e-10"); + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); @@ -249,23 +255,23 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) template KOKKOS_INLINE_FUNCTION -void PairExp6rxKokkos::operator()(TagPairExp6rxgetParamsEXP6, const int &i) const { - getParamsEXP6 (i, PairExp6ParamData.epsilon1[i], +void PairExp6rxKokkos::operator()(TagPairExp6rxgetMixingWeights, const int &i) const { + getMixingWeights (i, PairExp6ParamData.epsilon1[i], PairExp6ParamData.alpha1[i], PairExp6ParamData.rm1[i], - PairExp6ParamData.fraction1[i], + PairExp6ParamData.mixWtSite1[i], PairExp6ParamData.epsilon2[i], PairExp6ParamData.alpha2[i], PairExp6ParamData.rm2[i], - PairExp6ParamData.fraction2[i], + PairExp6ParamData.mixWtSite2[i], PairExp6ParamData.epsilonOld1[i], PairExp6ParamData.alphaOld1[i], PairExp6ParamData.rmOld1[i], - PairExp6ParamData.fractionOld1[i], + PairExp6ParamData.mixWtSite1old[i], PairExp6ParamData.epsilonOld2[i], PairExp6ParamData.alphaOld2[i], PairExp6ParamData.rmOld2[i], - PairExp6ParamData.fractionOld2[i]); + PairExp6ParamData.mixWtSite2old[i]); } template @@ -300,10 +306,10 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::allocate() memory->create(cut,n+1,n+1,"pair:cut_lj"); } + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairExp6rxKokkos::coeff(int narg, char **arg) +{ + PairExp6rx::coeff(narg,arg); + + if (scalingFlag == POLYNOMIAL) + for (int i = 0; i < 6; i++) { + s_coeffAlpha[i] = coeffAlpha[i]; + s_coeffEps[i] = coeffEps[i]; + s_coeffRm[i] = coeffRm[i]; + } +} + /* ---------------------------------------------------------------------- */ template @@ -793,7 +817,7 @@ void PairExp6rxKokkos::setup() template KOKKOS_INLINE_FUNCTION -void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm1, double &fraction1,double &epsilon2,double &alpha2,double &rm2,double &fraction2,double &epsilon1_old,double &alpha1_old,double &rm1_old, double &fraction1_old,double &epsilon2_old,double &alpha2_old,double &rm2_old,double &fraction2_old) const +void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,double &alpha1,double &rm1, double &mixWtSite1,double &epsilon2,double &alpha2,double &rm2,double &mixWtSite2,double &epsilon1_old,double &alpha1_old,double &rm1_old, double &mixWtSite1old,double &epsilon2_old,double &alpha2_old,double &rm2_old,double &mixWtSite2old) const { int iparam, jparam; double rmi, rmj, rmij, rm3ij; @@ -801,11 +825,16 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double double alphai, alphaj, alphaij; double epsilon_old, rm3_old, alpha_old; double epsilon, rm3, alpha; - double fractionOFA, fractionOFA_old; - double nTotalOFA, nTotalOFA_old; - double nTotal, nTotal_old; double xMolei, xMolej, xMolei_old, xMolej_old; + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; + double nTotal, nTotalold; + rm3 = 0.0; epsilon = 0.0; alpha = 0.0; @@ -813,32 +842,32 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double rm3_old = 0.0; alpha_old = 0.0; fractionOFA = 0.0; - fractionOFA_old = 0.0; - nTotalOFA = 0.0; - nTotalOFA_old = 0.0; + fractionOFAold = 0.0; + nMoleculesOFA = 0.0; + nMoleculesOFAold = 0.0; nTotal = 0.0; - nTotal_old = 0.0; + nTotalold = 0.0; // Compute the total number of molecules in the old and new CG particle as well as the total number of molecules in the fluid portion of the old and new CG particle for (int ispecies = 0; ispecies < nspecies; ispecies++){ nTotal += dvector(ispecies,id); - nTotal_old += dvector(ispecies+nspecies,id); + nTotalold += dvector(ispecies+nspecies,id); iparam = d_mol2param[ispecies]; if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { if (isite1 == d_params[iparam].ispecies || isite2 == d_params[iparam].ispecies) continue; - nTotalOFA_old += dvector(ispecies+nspecies,id); - nTotalOFA += dvector(ispecies,id); + nMoleculesOFAold += dvector(ispecies+nspecies,id); + nMoleculesOFA += dvector(ispecies,id); } } - if(nTotal < 1e-8 || nTotal_old < 1e-8) + if(nTotal < MY_EPSILON || nTotalold < MY_EPSILON) k_error_flag.d_view() = 1; // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) - fractionOFA_old = nTotalOFA_old / nTotal_old; - fractionOFA = nTotalOFA / nTotal; + fractionOFAold = nMoleculesOFAold / nTotalold; + fractionOFA = nMoleculesOFA / nTotal; for (int ispecies = 0; ispecies < nspecies; ispecies++) { iparam = d_mol2param[ispecies]; @@ -854,8 +883,10 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double alpha1 = d_params[iparam].alpha; // Compute the mole fraction of Site1 - fraction1_old = dvector(ispecies+nspecies,id)/nTotal_old; - fraction1 = dvector(ispecies,id)/nTotal; + nMoleculesOld1 = dvector(ispecies+nspecies,id); + nMolecules1 = dvector(ispecies,id); + fractionOld1 = nMoleculesOld1/nTotalold; + fraction1 = nMolecules1/nTotal; } // If Site2 matches a pure species, then grab the parameters @@ -868,8 +899,9 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double alpha2 = d_params[iparam].alpha; // Compute the mole fraction of Site2 - fraction2_old = dvector(ispecies+nspecies,id)/nTotal_old; - fraction2 = dvector(ispecies,id)/nTotal; + nMoleculesOld2 = dvector(ispecies+nspecies,id); + nMolecules2 = dvector(ispecies,id); + fractionOld2 = dvector(ispecies+nspecies,id)/nTotalold; } // If Site1 or Site2 matches is a fluid, then compute the paramters @@ -878,8 +910,10 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double rmi = d_params[iparam].rm; epsiloni = d_params[iparam].epsilon; alphai = d_params[iparam].alpha; - xMolei = dvector(ispecies,id)/nTotalOFA; - xMolei_old = dvector(ispecies+nspecies,id)/nTotalOFA_old; + if(nMoleculesOFA::getParamsEXP6(int id,double &epsilon1,double rmj = d_params[jparam].rm; epsilonj = d_params[jparam].epsilon; alphaj = d_params[jparam].alpha; - xMolej = dvector(jspecies,id)/nTotalOFA; - xMolej_old = dvector(jspecies+nspecies,id)/nTotalOFA_old; + if(nMoleculesOFA 0.0){ + if(fractionOFAold > 0.0){ rm3_old += xMolei_old*xMolej_old*rm3ij; epsilon_old += xMolei_old*xMolej_old*rm3ij*epsilonij; alpha_old += xMolei_old*xMolej_old*rm3ij*epsilonij*alphaij; @@ -912,7 +948,7 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double if (isOneFluidApprox(isite1)){ rm1 = cbrt(rm3); - if(rm1 < 1e-16) { + if(rm1 < MY_EPSILON) { rm1 = 0.0; epsilon1 = 0.0; alpha1 = 0.0; @@ -920,11 +956,11 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double epsilon1 = epsilon / rm3; alpha1 = alpha / epsilon1 / rm3; } - + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); fraction1 = fractionOFA; rm1_old = cbrt(rm3_old); - if(rm1_old < 1e-16) { + if(rm1_old < MY_EPSILON) { rm1_old = 0.0; epsilon1_old = 0.0; alpha1_old = 0.0; @@ -932,42 +968,21 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double epsilon1_old = epsilon_old / rm3_old; alpha1_old = alpha_old / epsilon1_old / rm3_old; } - fraction1_old = fractionOFA_old; + nMoleculesOld1 = 1.0-(nTotalold-nMoleculesOFAold); + fractionOld1 = fractionOFAold; - // Fuchslin-Like Exp-6 Scaling - double powfuch = 0.0; - if(exponentEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-exponentEpsilon); - if(powfuch<1e-15) epsilon1 = 0.0; - else epsilon1 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-exponentEpsilon); - if(powfuch<1e-15) epsilon1_old = 0.0; - else epsilon1_old *= 1.0/powfuch; - - } else { - epsilon1 *= pow(nTotalOFA,exponentEpsilon); - epsilon1_old *= pow(nTotalOFA_old,exponentEpsilon); - } - - if(exponentR < 0.0){ - powfuch = pow(nTotalOFA,-exponentR); - if(powfuch<1e-15) rm1 = 0.0; - else rm1 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-exponentR); - if(powfuch<1e-15) rm1_old = 0.0; - else rm1_old *= 1.0/powfuch; - - } else { - rm1 *= pow(nTotalOFA,exponentR); - rm1_old *= pow(nTotalOFA_old,exponentR); + if(scalingFlag == EXPONENT){ + exponentScaling(nMoleculesOFA,epsilon1,rm1); + exponentScaling(nMoleculesOFAold,epsilon1_old,rm1_old); + } else if(scalingFlag == POLYNOMIAL){ + polynomialScaling(nMoleculesOFA,alpha1,epsilon1,rm1); + polynomialScaling(nMoleculesOFAold,alpha1_old,epsilon1_old,rm1_old); } } if (isOneFluidApprox(isite2)){ rm2 = cbrt(rm3); - if(rm2 < 1e-16) { + if(rm2 < MY_EPSILON) { rm2 = 0.0; epsilon2 = 0.0; alpha2 = 0.0; @@ -975,10 +990,11 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double epsilon2 = epsilon / rm3; alpha2 = alpha / epsilon2 / rm3; } + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); fraction2 = fractionOFA; rm2_old = cbrt(rm3_old); - if(rm2_old < 1e-16) { + if(rm2_old < MY_EPSILON) { rm2_old = 0.0; epsilon2_old = 0.0; alpha2_old = 0.0; @@ -986,64 +1002,100 @@ void PairExp6rxKokkos::getParamsEXP6(int id,double &epsilon1,double epsilon2_old = epsilon_old / rm3_old; alpha2_old = alpha_old / epsilon2_old / rm3_old; } - fraction2_old = fractionOFA_old; + nMoleculesOld2 = 1.0-(nTotalold-nMoleculesOFAold); + fractionOld2 = fractionOFAold; - // Fuchslin-Like Exp-6 Scaling - double powfuch = 0.0; - if(exponentEpsilon < 0.0){ - powfuch = pow(nTotalOFA,-exponentEpsilon); - if(powfuch<1e-15) epsilon2 = 0.0; - else epsilon2 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-exponentEpsilon); - if(powfuch<1e-15) epsilon2_old = 0.0; - else epsilon2_old *= 1.0/powfuch; - - } else { - epsilon2 *= pow(nTotalOFA,exponentEpsilon); - epsilon2_old *= pow(nTotalOFA_old,exponentEpsilon); - } - - if(exponentR < 0.0){ - powfuch = pow(nTotalOFA,-exponentR); - if(powfuch<1e-15) rm2 = 0.0; - else rm2 *= 1.0/powfuch; - - powfuch = pow(nTotalOFA_old,-exponentR); - if(powfuch<1e-15) rm2_old = 0.0; - else rm2_old *= 1.0/powfuch; - - } else { - rm2 *= pow(nTotalOFA,exponentR); - rm2_old *= pow(nTotalOFA_old,exponentR); + if(scalingFlag == EXPONENT){ + exponentScaling(nMoleculesOFA,epsilon2,rm2); + exponentScaling(nMoleculesOFAold,epsilon2_old,rm2_old); + } else if(scalingFlag == POLYNOMIAL){ + polynomialScaling(nMoleculesOFA,alpha2,epsilon2,rm2); + polynomialScaling(nMoleculesOFAold,alpha2_old,epsilon2_old,rm2_old); } } // Check that no fractions are less than zero - if(fraction1 < 0.0){ - if(fraction1 < -1.0e-10){ + if(fraction1 < 0.0 || nMolecules1 < 0.0){ + if(fraction1 < -MY_EPSILON || nMolecules1 < -MY_EPSILON){ k_error_flag.d_view() = 2; } + nMolecules1 = 0.0; fraction1 = 0.0; } - if(fraction2 < 0.0){ - if(fraction2 < -1.0e-10){ + if(fraction2 < 0.0 || nMolecules2 < 0.0){ + if(fraction2 < -MY_EPSILON || nMolecules2 < -MY_EPSILON){ k_error_flag.d_view() = 2; } + nMolecules2 = 0.0; fraction2 = 0.0; } - if(fraction1_old < 0.0){ - if(fraction1_old < -1.0e-10){ + if(fractionOld1 < 0.0 || nMoleculesOld1 < 0.0){ + if(fractionOld1 < -MY_EPSILON || nMoleculesOld1 < -MY_EPSILON){ k_error_flag.d_view() = 2; } - fraction1_old = 0.0; + nMoleculesOld1 = 0.0; + fractionOld1 = 0.0; } - if(fraction2_old < 0.0){ - if(fraction2_old < -1.0e-10){ + if(fractionOld2 < 0.0 || nMoleculesOld2 < 0.0){ + if(fractionOld2 < -MY_EPSILON || nMoleculesOld2 < -MY_EPSILON){ k_error_flag.d_view() = 2; } - fraction2_old = 0.0; + nMoleculesOld2 = 0.0; + fractionOld2 = 0.0; } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::exponentScaling(double phi, double &epsilon, double &rm) const +{ + double powfuch; + + if(exponentEpsilon < 0.0){ + powfuch = pow(phi,-exponentEpsilon); + if(powfuch +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::polynomialScaling(double phi, double &alpha, double &epsilon, double &rm) const +{ + double phi2 = phi*phi; + double phi3 = phi2*phi; + double phi4 = phi2*phi2; + double phi5 = phi2*phi3; + + alpha = (s_coeffAlpha[0]*phi5 + s_coeffAlpha[1]*phi4 + s_coeffAlpha[2]*phi3 + s_coeffAlpha[3]*phi2 + s_coeffAlpha[4]*phi + s_coeffAlpha[5]); + epsilon *= (s_coeffEps[0]*phi5 + s_coeffEps[1]*phi4 + s_coeffEps[2]*phi3 + s_coeffEps[3]*phi2 + s_coeffEps[4]*phi + s_coeffEps[5]); + rm *= (s_coeffEps[0]*phi5 + s_coeffEps[1]*phi4 + s_coeffEps[2]*phi3 + s_coeffEps[3]*phi2 + s_coeffEps[4]*phi + s_coeffEps[5]); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 7dfe20fc22..488c9d0039 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -38,18 +38,21 @@ struct PairExp6ParamDataTypeKokkos typedef ArrayTypes AT; int n; - typename AT::t_float_1d epsilon1, alpha1, rm1, fraction1, - epsilon2, alpha2, rm2, fraction2, - epsilonOld1, alphaOld1, rmOld1, fractionOld1, - epsilonOld2, alphaOld2, rmOld2, fractionOld2; + typename AT::t_float_1d epsilon1, alpha1, rm1, mixWtSite1, + epsilon2, alpha2, rm2, mixWtSite2, + epsilonOld1, alphaOld1, rmOld1, mixWtSite1old, + epsilonOld2, alphaOld2, rmOld2, mixWtSite2old; // Default constructor -- nullify everything. PairExp6ParamDataTypeKokkos(void) - : n(0) + : n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), mixWtSite1(NULL), + epsilon2(NULL), alpha2(NULL), rm2(NULL), mixWtSite2(NULL), + epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), mixWtSite1old(NULL), + epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), mixWtSite2old(NULL) {} }; -struct TagPairExp6rxgetParamsEXP6{}; +struct TagPairExp6rxgetMixingWeights{}; template struct TagPairExp6rxCompute{}; @@ -64,10 +67,11 @@ class PairExp6rxKokkos : public PairExp6rx { PairExp6rxKokkos(class LAMMPS *); virtual ~PairExp6rxKokkos(); void compute(int, int); + void coeff(int, char **); void init_style(); KOKKOS_INLINE_FUNCTION - void operator()(TagPairExp6rxgetParamsEXP6, const int&) const; + void operator()(TagPairExp6rxgetMixingWeights, const int&) const; template KOKKOS_INLINE_FUNCTION @@ -127,7 +131,15 @@ class PairExp6rxKokkos : public PairExp6rx { void setup(); KOKKOS_INLINE_FUNCTION - void getParamsEXP6(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const; + void getMixingWeights(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void exponentScaling(double, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void polynomialScaling(double, double &, double &, double &) const; + + double s_coeffAlpha[6],s_coeffEps[6],s_coeffRm[6]; KOKKOS_INLINE_FUNCTION double func_rin(const double &) const; @@ -196,7 +208,7 @@ E: Potential file has duplicate entry. Self-explanatory -E: The number of molecules in CG particle is less than 1e-8. +E: The number of molecules in CG particle is less than 10*DBL_EPSILON. Self-explanatory. Check the species concentrations have been properly set and check the reaction kinetic solver parameters in fix rx to more for diff --git a/src/USER-DPD/pair_exp6_rx.h b/src/USER-DPD/pair_exp6_rx.h index a7531da318..31d4ffb20b 100644 --- a/src/USER-DPD/pair_exp6_rx.h +++ b/src/USER-DPD/pair_exp6_rx.h @@ -30,7 +30,7 @@ class PairExp6rx : public Pair { virtual ~PairExp6rx(); virtual void compute(int, int); void settings(int, char **); - void coeff(int, char **); + virtual void coeff(int, char **); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); From ccaa0506cb93e9f884ebc22b32575feda7e99199 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 22 Dec 2016 07:55:15 -0700 Subject: [PATCH 036/267] LAMMPS_LAMBDA from ibaned/lammps@7559bc9 --- src/KOKKOS/kokkos_type.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index c1176122a7..cc096058ec 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -920,4 +920,10 @@ void memset_kokkos (ViewType &view) { #define ISFINITE(x) std::isfinite(x) #endif +#ifdef KOKKOS_HAVE_CUDA +#define LAMMPS_LAMBDA [=] __device__ +#else +#define LAMMPS_LAMBDA [=] +#endif + #endif From 66cdd3a708b911663cc45e7d6117f09d39784123 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 22 Dec 2016 08:01:46 -0700 Subject: [PATCH 037/267] draft fix_dpd_energy_kokkos.h --- src/KOKKOS/fix_dpd_energy_kokkos.h | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/KOKKOS/fix_dpd_energy_kokkos.h diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.h b/src/KOKKOS/fix_dpd_energy_kokkos.h new file mode 100644 index 0000000000..399cf91334 --- /dev/null +++ b/src/KOKKOS/fix_dpd_energy_kokkos.h @@ -0,0 +1,44 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(dpd/energy/kk,FixDPDenergyKokkos) +FixStyle(dpd/energy/kk/device,FixDPDenergyKokkos) +FixStyle(dpd/energy/kk/host,FixDPDenergyKokkos) + +#else + +#ifndef LMP_FIX_DPDE_H +#define LMP_FIX_DPDE_H + +#include "fix_dpd_energy.h" + +namespace LAMMPS_NS { + +class FixDPDenergyKokkos : public FixDPDEnergy { + public: + FixDPDenergyKokkos(class LAMMPS *, int, char **); + virtual ~FixDPDenergyKokkos() {} + virtual void initial_integrate(int); + virtual void final_integrate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ From 53e07996c6929f422568b5473a77cbcaea799c1e Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 22 Dec 2016 08:07:48 -0700 Subject: [PATCH 038/267] save draft of fix_dpd_energy_kokkos.cpp --- src/KOKKOS/fix_dpd_energy_kokkos.cpp | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 src/KOKKOS/fix_dpd_energy_kokkos.cpp diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.cpp b/src/KOKKOS/fix_dpd_energy_kokkos.cpp new file mode 100644 index 0000000000..ea93c28b01 --- /dev/null +++ b/src/KOKKOS/fix_dpd_energy_kokkos.cpp @@ -0,0 +1,77 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include "fix_dpd_energy_kokkos.h" +#include "atom_kokkos.h" +#include "force.h" +#include "update.h" +#include "respa.h" +#include "modify.h" +#include "error.h" +#include "pair_dpd_fdt_energy.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixDPDenergyKokkos::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) : + FixDPDenergy(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- + allow for both per-type and per-atom mass +------------------------------------------------------------------------- */ + +void FixDPDenergyKokkos::initial_integrate(int vflag) +{ + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + t_efloat_1d uCond = atomKK + double *uCond = atom->uCond; + double *uMech = atom->uMech; + double *duCond = pairDPDE->duCond; + double *duMech = pairDPDE->duMech; + + for (int i = 0; i < nlocal; i++){ + uCond[i] += 0.5*update->dt*duCond[i]; + uMech[i] += 0.5*update->dt*duMech[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixDPDenergyKokkos::final_integrate() +{ + int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + double *uCond = atom->uCond; + double *uMech = atom->uMech; + double *duCond = pairDPDE->duCond; + double *duMech = pairDPDE->duMech; + + for (int i = 0; i < nlocal; i++){ + uCond[i] += 0.5*update->dt*duCond[i]; + uMech[i] += 0.5*update->dt*duMech[i]; + } +} From 04e2f170a33c2162f841b8a21403d95048b86371 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 22 Dec 2016 09:28:25 -0700 Subject: [PATCH 039/267] first draft fix_dpd_energy_kokkos had to make k_duCond and k_duMech in pair_dpd_fdt_energy_kokkos public so they could be accessed and sync'ed --- src/KOKKOS/fix_dpd_energy_kokkos.cpp | 76 ++++++++++++++----------- src/KOKKOS/fix_dpd_energy_kokkos.h | 10 ++++ src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 3 +- 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.cpp b/src/KOKKOS/fix_dpd_energy_kokkos.cpp index ea93c28b01..38671d66ab 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.cpp +++ b/src/KOKKOS/fix_dpd_energy_kokkos.cpp @@ -20,14 +20,14 @@ #include "respa.h" #include "modify.h" #include "error.h" -#include "pair_dpd_fdt_energy.h" using namespace LAMMPS_NS; using namespace FixConst; /* ---------------------------------------------------------------------- */ -FixDPDenergyKokkos::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) : +template +FixDPDenergyKokkos::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) : FixDPDenergy(lmp, narg, arg) { kokkosable = 1; @@ -35,43 +35,55 @@ FixDPDenergyKokkos::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) : execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; -} - -/* ---------------------------------------------------------------------- - allow for both per-type and per-atom mass -------------------------------------------------------------------------- */ - -void FixDPDenergyKokkos::initial_integrate(int vflag) -{ - int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - t_efloat_1d uCond = atomKK - double *uCond = atom->uCond; - double *uMech = atom->uMech; - double *duCond = pairDPDE->duCond; - double *duMech = pairDPDE->duMech; - - for (int i = 0; i < nlocal; i++){ - uCond[i] += 0.5*update->dt*duCond[i]; - uMech[i] += 0.5*update->dt*duMech[i]; - } + pairDPDEKK = dynamic_cast(pairDPDE); + if (!pairDPDEKK) + error->all(FLERR,"Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk"); } /* ---------------------------------------------------------------------- */ -void FixDPDenergyKokkos::final_integrate() +template +void FixDPDenergyKokkos::take_half_step() { int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; - double *uCond = atom->uCond; - double *uMech = atom->uMech; - double *duCond = pairDPDE->duCond; - double *duMech = pairDPDE->duMech; + atomKK->sync(execution_space, UCOND_MASK); + t_efloat_1d uCond = atomKK->k_uCond.view(); + atomKK->sync(execution_space, UMECH_MASK); + t_efloat_1d uMech = atomKK->k_uMech.view(); - for (int i = 0; i < nlocal; i++){ - uCond[i] += 0.5*update->dt*duCond[i]; - uMech[i] += 0.5*update->dt*duMech[i]; - } + pairDPDEKK->k_duCond.sync(); + t_efloat_1d_const duCond = pairDPDEKK->k_duCond.view(); + pairDPDEKK->k_duMech.sync(); + t_efloat_1d_const duMech = pairDPDEKK->k_duMech.view(); + + auto dt = update->dt; + + Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) { + uCond(i) += 0.5*dt*duCond(i); + uMech(i) += 0.5*dt*duMech(i); + }); + + atomKK->modified(execution_space, UCOND_MASK); + atomKK->modified(execution_space, UMECH_MASK); + //should not be needed once everything is Kokkos + atomKK->sync(ExecutionSpaceFromDevice, UCOND_MASK); + atomKK->sync(ExecutionSpaceFromDevice, UMECH_MASK); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixDPDenergyKokkos::initial_integrate(int) +{ + take_half_step(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixDPDenergyKokkos::final_integrate() +{ + take_half_step(); } diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.h b/src/KOKKOS/fix_dpd_energy_kokkos.h index 399cf91334..e5ae2b0127 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.h +++ b/src/KOKKOS/fix_dpd_energy_kokkos.h @@ -23,15 +23,21 @@ FixStyle(dpd/energy/kk/host,FixDPDenergyKokkos) #define LMP_FIX_DPDE_H #include "fix_dpd_energy.h" +#include "pair_dpd_dft_energy_kokkos.h" namespace LAMMPS_NS { +template class FixDPDenergyKokkos : public FixDPDEnergy { public: FixDPDenergyKokkos(class LAMMPS *, int, char **); virtual ~FixDPDenergyKokkos() {} virtual void initial_integrate(int); virtual void final_integrate(); + + protected: + void take_half_step(); + PairDPDfdtEnergyKokkos* pairDPDEKK; }; } @@ -41,4 +47,8 @@ class FixDPDenergyKokkos : public FixDPDEnergy { /* ERROR/WARNING messages: +E: Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk + +Self-explanatory. + */ diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 8e7d01de2a..41360091bc 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -84,6 +84,8 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { F_FLOAT cut,a0,sigma,kappa; }; + DAT::tdual_efloat_1d k_duCond,k_duMech; + protected: int eflag,vflag; int nlocal,neighflag; @@ -110,7 +112,6 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { typename ArrayTypes::t_float_1d_randomread mass; double *rmass; typename AT::t_efloat_1d dpdTheta; - DAT::tdual_efloat_1d k_duCond,k_duMech; typename AT::t_efloat_1d d_duCond,d_duMech; HAT::t_efloat_1d h_duCond,h_duMech; From 89795b3653ea6270b90911cf30b3777312af5828 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 22 Dec 2016 10:18:30 -0700 Subject: [PATCH 040/267] got fix_dpd_energy_kokkos to compile --- src/KOKKOS/Install.sh | 2 ++ src/KOKKOS/fix_dpd_energy_kokkos.cpp | 30 ++++++++++++++++++---------- src/KOKKOS/fix_dpd_energy_kokkos.h | 8 ++++---- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 17e9f93c9d..96ec348b30 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -99,6 +99,8 @@ action fix_setforce_kokkos.cpp action fix_setforce_kokkos.h action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h +action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp +action fix_dpd_energy_kokkos.h fix_dpd_energy.h action gridcomm_kokkos.cpp gridcomm.cpp action gridcomm_kokkos.h gridcomm.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.cpp b/src/KOKKOS/fix_dpd_energy_kokkos.cpp index 38671d66ab..6ab0b215b4 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.cpp +++ b/src/KOKKOS/fix_dpd_energy_kokkos.cpp @@ -14,6 +14,7 @@ #include #include #include "fix_dpd_energy_kokkos.h" +#include "atom_masks.h" #include "atom_kokkos.h" #include "force.h" #include "update.h" @@ -48,15 +49,17 @@ void FixDPDenergyKokkos::take_half_step() int nlocal = atom->nlocal; if (igroup == atom->firstgroup) nlocal = atom->nfirst; - atomKK->sync(execution_space, UCOND_MASK); - t_efloat_1d uCond = atomKK->k_uCond.view(); - atomKK->sync(execution_space, UMECH_MASK); - t_efloat_1d uMech = atomKK->k_uMech.view(); + using AT = ArrayTypes; - pairDPDEKK->k_duCond.sync(); - t_efloat_1d_const duCond = pairDPDEKK->k_duCond.view(); - pairDPDEKK->k_duMech.sync(); - t_efloat_1d_const duMech = pairDPDEKK->k_duMech.view(); + atomKK->sync(execution_space, UCOND_MASK); + typename AT::t_efloat_1d uCond = atomKK->k_uCond.view(); + atomKK->sync(execution_space, UMECH_MASK); + typename AT::t_efloat_1d uMech = atomKK->k_uMech.view(); + + pairDPDEKK->k_duCond.template sync(); + typename AT::t_efloat_1d_const duCond = pairDPDEKK->k_duCond.template view(); + pairDPDEKK->k_duMech.template sync(); + typename AT::t_efloat_1d_const duMech = pairDPDEKK->k_duMech.template view(); auto dt = update->dt; @@ -68,8 +71,8 @@ void FixDPDenergyKokkos::take_half_step() atomKK->modified(execution_space, UCOND_MASK); atomKK->modified(execution_space, UMECH_MASK); //should not be needed once everything is Kokkos - atomKK->sync(ExecutionSpaceFromDevice, UCOND_MASK); - atomKK->sync(ExecutionSpaceFromDevice, UMECH_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space, UCOND_MASK); + atomKK->sync(ExecutionSpaceFromDevice::space, UMECH_MASK); } /* ---------------------------------------------------------------------- */ @@ -87,3 +90,10 @@ void FixDPDenergyKokkos::final_integrate() { take_half_step(); } + +namespace LAMMPS_NS { +template class FixDPDenergyKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixDPDenergyKokkos; +#endif +} diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.h b/src/KOKKOS/fix_dpd_energy_kokkos.h index e5ae2b0127..0c43ecf422 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.h +++ b/src/KOKKOS/fix_dpd_energy_kokkos.h @@ -19,16 +19,16 @@ FixStyle(dpd/energy/kk/host,FixDPDenergyKokkos) #else -#ifndef LMP_FIX_DPDE_H -#define LMP_FIX_DPDE_H +#ifndef LMP_FIX_DPDE_KOKKOS_H +#define LMP_FIX_DPDE_KOKKOS_H #include "fix_dpd_energy.h" -#include "pair_dpd_dft_energy_kokkos.h" +#include "pair_dpd_fdt_energy_kokkos.h" namespace LAMMPS_NS { template -class FixDPDenergyKokkos : public FixDPDEnergy { +class FixDPDenergyKokkos : public FixDPDenergy { public: FixDPDenergyKokkos(class LAMMPS *, int, char **); virtual ~FixDPDenergyKokkos() {} From e632f8597ac7a5229d51e8f12bfbd776f9369aee Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 09:04:10 -0700 Subject: [PATCH 041/267] fix warning about enum comparisons --- src/KOKKOS/pair_table_kokkos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 4d3a9ec106..e768c97164 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -31,7 +31,7 @@ namespace LAMMPS_NS { template struct S_TableCompute { - enum {TabStyle = TABSTYLE}; + static constexpr int TabStyle = TABSTYLE; }; template From dae132c77099b2b5a895b3c08ee7df7bd11a74d4 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 10:53:10 -0700 Subject: [PATCH 042/267] place newline at end of file --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 133d366fbc..4f04da2f3b 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -682,4 +682,4 @@ template class PairDPDfdtEnergyKokkos; #ifdef KOKKOS_HAVE_CUDA template class PairDPDfdtEnergyKokkos; #endif -} \ No newline at end of file +} From e3ebd8e7f1793caa1b7f686f6d7ae9a671be25ac Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 09:15:30 -0700 Subject: [PATCH 043/267] remove syncs that shouldn't be needed --- src/KOKKOS/fix_dpd_energy_kokkos.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.cpp b/src/KOKKOS/fix_dpd_energy_kokkos.cpp index 6ab0b215b4..e6878afed4 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.cpp +++ b/src/KOKKOS/fix_dpd_energy_kokkos.cpp @@ -70,9 +70,6 @@ void FixDPDenergyKokkos::take_half_step() atomKK->modified(execution_space, UCOND_MASK); atomKK->modified(execution_space, UMECH_MASK); - //should not be needed once everything is Kokkos - atomKK->sync(ExecutionSpaceFromDevice::space, UCOND_MASK); - atomKK->sync(ExecutionSpaceFromDevice::space, UMECH_MASK); } /* ---------------------------------------------------------------------- */ From 6d7607a6ade5299b021190373add4a1d50765aaa Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 11:13:46 -0700 Subject: [PATCH 044/267] member function containing lambdas must be public --- src/KOKKOS/fix_dpd_energy_kokkos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_dpd_energy_kokkos.h b/src/KOKKOS/fix_dpd_energy_kokkos.h index 0c43ecf422..ebf3a796fe 100644 --- a/src/KOKKOS/fix_dpd_energy_kokkos.h +++ b/src/KOKKOS/fix_dpd_energy_kokkos.h @@ -35,8 +35,8 @@ class FixDPDenergyKokkos : public FixDPDenergy { virtual void initial_integrate(int); virtual void final_integrate(); - protected: void take_half_step(); + protected: PairDPDfdtEnergyKokkos* pairDPDEKK; }; From ae0e882cde4c74c5cae0af09fa5fd46c783fad24 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Jan 2017 11:51:28 -0700 Subject: [PATCH 045/267] Updating pair_multi_lucy_rx_kokkos to USER-DPD changes --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 111 ++++++++++++++++------- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 8 +- 2 files changed, 82 insertions(+), 37 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 1dc8ccbae9..7cff630cb0 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -45,6 +45,12 @@ enum{NONE,RLINEAR,RSQ}; #define MAXLINE 1024 +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + #define oneFluidParameter (-1) #define isOneFluid(_site) ( (_site) == oneFluidParameter ) @@ -187,12 +193,12 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in { const int ntotal = nlocal + nghost; - d_fractionOld1 = typename AT::t_float_1d("PairMultiLucyRX::fractionOld1",ntotal); - d_fractionOld2 = typename AT::t_float_1d("PairMultiLucyRX::fractionOld2",ntotal); - d_fraction1 = typename AT::t_float_1d("PairMultiLucyRX::fraction1",ntotal); - d_fraction2 = typename AT::t_float_1d("PairMultiLucyRX::fraction2",ntotal); + d_mixWtSite1old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1old",ntotal); + d_mixWtSite2old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2old",ntotal); + d_mixWtSite1 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1",ntotal); + d_mixWtSite2 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2",ntotal); - Kokkos::parallel_for(Kokkos::RangePolicy(0,ntotal),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,ntotal),*this); } const int inum = list->inum; @@ -259,8 +265,8 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in template KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXgetParams, const int &i) const { - getParams(i, d_fractionOld1[i], d_fractionOld2[i], d_fraction1[i], d_fraction2[i]); +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXgetMixingWeights, const int &i) const { + getMixingWeights(i, d_mixWtSite1old[i], d_mixWtSite2old[i], d_mixWtSite1[i], d_mixWtSite2[i]); } template @@ -275,9 +281,9 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXComputeLoca template KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos::getParams(int id, double &fractionOld1, double &fractionOld2, double &fraction1, double &fraction2) const +void PairMultiLucyRXKokkos::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) const { - double fractionOld, fraction; + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; double nTotal, nTotalOld; @@ -579,32 +590,56 @@ void PairMultiLucyRXKokkos::getParams(int id, double &fractionOld1, } if (isOneFluid(isite1) == false){ - fractionOld1 = dvector(isite1+nspecies,id)/nTotalOld; - fraction1 = dvector(isite1,id)/nTotal; + nMoleculesOld1 = dvector(isite1+nspecies,id); + nMolecules1 = dvector(isite1,id); + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; } if (isOneFluid(isite2) == false){ - fractionOld2 = dvector(isite2+nspecies,id)/nTotalOld; - fraction2 = dvector(isite2,id)/nTotal; + nMoleculesOld2 = dvector(isite2+nspecies,id); + nMolecules2 = dvector(isite2,id); + fractionOld2 = nMoleculesOld2/nTotalOld; + fraction2 = nMolecules2/nTotal; } if (isOneFluid(isite1) || isOneFluid(isite2)){ - fractionOld = 0.0; - fraction = 0.0; + nMoleculesOFAold = 0.0; + nMoleculesOFA = 0.0; + fractionOFAold = 0.0; + fractionOFA = 0.0; for (int ispecies = 0; ispecies < nspecies; ispecies++){ if (isite1 == ispecies || isite2 == ispecies) continue; - fractionOld += dvector(ispecies+nspecies,id) / nTotalOld; - fraction += dvector(ispecies,id) / nTotal; + nMoleculesOFAold += dvector(ispecies+nspecies,id); + nMoleculesOFA += dvector(ispecies,id); + fractionOFAold += dvector(ispecies+nspecies,id) / nTotalOld; + fractionOFA += dvector(ispecies,id) / nTotal; } if (isOneFluid(isite1)){ - fractionOld1 = fractionOld; - fraction1 = fraction; + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); + fractionOld1 = fractionOFAold; + fraction1 = fractionOFA; } if (isOneFluid(isite2)){ - fractionOld2 = fractionOld; - fraction2 = fraction; + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); + fractionOld2 = fractionOFAold; + fraction2 = fractionOFA; } } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } } /* ---------------------------------------------------------------------- */ @@ -897,6 +932,16 @@ void PairMultiLucyRXKokkos::settings(int narg, char **arg) tablength = force->inumeric(FLERR,arg[1]); if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries"); + // optional keywords + + int iarg = 2; + while (iarg < narg) { + if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; + else error->all(FLERR,"Illegal pair_style command"); + iarg++; + } + // delete old tables, since cannot just change settings for (int m = 0; m < ntables; m++) free_table(&tables[m]); diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index a6622ac4ec..1e84e3efd8 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -32,7 +32,7 @@ namespace LAMMPS_NS { struct TagPairMultiLucyRXPackForwardComm{}; struct TagPairMultiLucyRXUnpackForwardComm{}; -struct TagPairMultiLucyRXgetParams{}; +struct TagPairMultiLucyRXgetMixingWeights{}; template struct TagPairMultiLucyRXCompute{}; @@ -75,7 +75,7 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const; KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXgetParams, const int&) const; + void operator()(TagPairMultiLucyRXgetMixingWeights, const int&) const; template KOKKOS_INLINE_FUNCTION @@ -154,9 +154,9 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { void create_kokkos_tables(); KOKKOS_INLINE_FUNCTION - void getParams(int, double &, double &, double &, double &) const; + void getMixingWeights(int, double &, double &, double &, double &) const; - typename AT::t_float_1d d_fractionOld1,d_fractionOld2,d_fraction1,d_fraction2; + typename AT::t_float_1d d_mixWtSite1old,d_mixWtSite2old,d_mixWtSite1,d_mixWtSite2; typename AT::t_x_array_randomread x; typename AT::t_f_array f; From a4271ae8c5c2367cfc33f08b4994849a0cccd40b Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Wed, 4 Jan 2017 15:13:46 -0500 Subject: [PATCH 046/267] Added a Makefile for AFRL Thunder. --- src/MAKE/MACHINES/Makefile.afrl_thunder | 116 ++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 src/MAKE/MACHINES/Makefile.afrl_thunder diff --git a/src/MAKE/MACHINES/Makefile.afrl_thunder b/src/MAKE/MACHINES/Makefile.afrl_thunder new file mode 100644 index 0000000000..ceeec48870 --- /dev/null +++ b/src/MAKE/MACHINES/Makefile.afrl_thunder @@ -0,0 +1,116 @@ +# mpi = MPI with its default compiler + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +CC = mpicxx +CCFLAGS = -g -O3 -Wall -Wextra -frounding-math -fsignaling-nans -march=native +SHFLAGS = -shared -MD -mcmodel=medium -fpic -fPIC +DEPFLAGS = -M + +LINK = mpicxx +LINKFLAGS = -g -O +LIB = +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared + +# --------------------------------------------------------------------- +# LAMMPS-specific settings, all OPTIONAL +# specify settings for LAMMPS features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# LAMMPS ifdef settings +# see possible settings in Section 2.2 (step 4) of manual + +LMP_INC = -DLAMMPS_GZIP +#LMP_INC += -DLAMMPS_JPEG +LMP_INC += -DLAMMPS_MEMALIGN=64 + +# MPI library +# see discussion in Section 2.2 (step 5) of manual +# MPI wrapper compiler/linker can provide this info +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 +MPI_PATH = +MPI_LIB = + +# FFT library +# see discussion in Section 2.2 (step 6) of manual +# can be left blank to use provided KISS FFT library +# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings +# PATH = path for FFT library +# LIB = name of FFT library + +FFT_INC = +FFT_PATH = +FFT_LIB = + +# JPEG and/or PNG library +# see discussion in Section 2.2 (step 7) of manual +# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC +# INC = path(s) for jpeglib.h and/or png.h +# PATH = path(s) for JPEG library and/or PNG library +# LIB = name(s) of JPEG library and/or PNG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# do not edit this section + +include Makefile.package.settings +include Makefile.package + +EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) + $(SIZE) $(EXE) + +# Library targets + +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) + +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + +# Compilation rules + +%.o:%.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +depend : fastdep.exe $(SRC) + @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 + +fastdep.exe: ../DEPEND/fastdep.c + cc -O -o $@ $< + +sinclude .depend From 8503ac22a859abb2617b5fe9bab2e5db6ec0803f Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Thu, 5 Jan 2017 13:58:11 -0500 Subject: [PATCH 047/267] Fixed error->all instead of error->one bug in USER-DPD/fix_shardlow.cpp. During dynamic load balancing, the subdomains will not be uniform so the bbox size test in USER-DPD/fix_shardlow.cpp may only be called by one rank. Using error->one allows any rank to stop the simulation in this scenario. Added rcut and bbox information to help in diagnostics. --- src/USER-DPD/fix_shardlow.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 541f4ba3c3..108b82a5b6 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -445,7 +445,12 @@ void FixShardlow::initial_integrate(int vflag) error->all(FLERR,"Fix shardlow does not yet support triclinic geometries"); if(rcut >= bbx || rcut >= bby || rcut>= bbz ) - error->all(FLERR,"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin\n"); + { + char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"}; + char *msg = (char *) malloc(sizeof(fmt) + 4*15); + sprintf(msg, fmt, rcut, bbx, bby, bbz); + error->one(FLERR, msg); + } // Allocate memory for v_t0 to hold the initial velocities for the ghosts v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0"); From 332372dec2caab9d6f8fdcf3c87d3d6b37466999 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Thu, 5 Jan 2017 14:03:16 -0500 Subject: [PATCH 048/267] Renamed Makefile.afrl_thunder to Makefile.icex to be more general. --- src/MAKE/MACHINES/{Makefile.afrl_thunder => Makefile.icex} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/MAKE/MACHINES/{Makefile.afrl_thunder => Makefile.icex} (100%) diff --git a/src/MAKE/MACHINES/Makefile.afrl_thunder b/src/MAKE/MACHINES/Makefile.icex similarity index 100% rename from src/MAKE/MACHINES/Makefile.afrl_thunder rename to src/MAKE/MACHINES/Makefile.icex From 19f2d2d1ecb13dae5ef26f16c8ba75b935c5fcf5 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 5 Jan 2017 15:22:59 -0700 Subject: [PATCH 049/267] fix many warnings in pair_dpd_fdt_energy_kokkos one Kokkos kernel was not annotated consistently, STACKPARAMS was essentially uninitialized and confused with a local variable, plus lots of variables were unused in some of the Kokkos kernels. --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 14 +++----------- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 1 + 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 4f04da2f3b..5de2b38ed0 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -49,6 +49,7 @@ PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; + STACKPARAMS = 0; } /* ---------------------------------------------------------------------- */ @@ -164,8 +165,6 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) boltz = force->boltz; ftm2v = force->ftm2v; - int STACKPARAMS = 0; // optimize - // loop over neighbors of my atoms EV_FLOAT ev; @@ -278,14 +277,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp int i,j,jj,inum,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double vxtmp,vytmp,vztmp,delvx,delvy,delvz; - double rsq,r,rinv,wd,wr,factor_dpd,uTmp; - double dot,randnum; - - double kappa_ij, alpha_ij, theta_ij, gamma_ij; - double mass_i, mass_j; - double massinv_i, massinv_j; - double randPair, mu_ij; + double rsq,r,rinv,wd,wr,factor_dpd; i = d_ilist[ii]; xtmp = x(i,0); @@ -369,7 +361,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo Kokkos::View::value> > a_duCond = d_duCond; Kokkos::View::value> > a_duMech = d_duMech; - int i,j,jj,inum,jnum,itype,jtype; + int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double vxtmp,vytmp,vztmp,delvx,delvy,delvz; double rsq,r,rinv,wd,wr,factor_dpd,uTmp; diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 41360091bc..2c2b78ac57 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -51,6 +51,7 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { void init_style(); double init_one(int, int); + KOKKOS_INLINE_FUNCTION void operator()(TagPairDPDfdtEnergyZero, const int&) const; template From 318ab9a18506143905a0692f33dd9ac6e22ac4d0 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 16:33:15 -0700 Subject: [PATCH 050/267] trying PairTableRX : public PairTable saves a lot of duplicate code --- src/USER-DPD/pair_table_rx.cpp | 662 +-------------------------------- src/USER-DPD/pair_table_rx.h | 40 +- src/pair_table.cpp | 4 +- src/pair_table.h | 7 +- 4 files changed, 13 insertions(+), 700 deletions(-) diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index e3cacc6155..c8d59c052d 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -31,10 +31,6 @@ using namespace LAMMPS_NS; -enum{NONE,RLINEAR,RSQ,BMP}; - -#define MAXLINE 1024 - #ifdef DBL_EPSILON #define MY_EPSILON (10.0*DBL_EPSILON) #else @@ -46,31 +42,13 @@ enum{NONE,RLINEAR,RSQ,BMP}; /* ---------------------------------------------------------------------- */ -PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp) +PairTableRX::PairTableRX(LAMMPS *lmp) : PairTable(lmp) { - ntables = 0; - tables = NULL; fractionalWeighting = true; } /* ---------------------------------------------------------------------- */ -PairTableRX::~PairTableRX() -{ - if (copymode) return; - - for (int m = 0; m < ntables; m++) free_table(&tables[m]); - memory->sfree(tables); - - if (allocated) { - memory->destroy(setflag); - memory->destroy(cutsq); - memory->destroy(tabindex); - } -} - -/* ---------------------------------------------------------------------- */ - void PairTableRX::compute(int eflag, int vflag) { int i,j,ii,jj,inum,jnum,itype,jtype,itable; @@ -254,24 +232,6 @@ void PairTableRX::compute(int eflag, int vflag) memory->destroy(mixWtSite2); } -/* ---------------------------------------------------------------------- - allocate all arrays -------------------------------------------------------------------------- */ - -void PairTableRX::allocate() -{ - allocated = 1; - const int nt = atom->ntypes + 1; - - memory->create(setflag,nt,nt,"pair:setflag"); - memory->create(cutsq,nt,nt,"pair:cutsq"); - memory->create(tabindex,nt,nt,"pair:tabindex"); - - memset(&setflag[0][0],0,nt*nt*sizeof(int)); - memset(&cutsq[0][0],0,nt*nt*sizeof(double)); - memset(&tabindex[0][0],0,nt*nt*sizeof(int)); -} - /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ @@ -301,8 +261,8 @@ void PairTableRX::settings(int narg, char **arg) else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1; else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1; else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1; - else if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; - else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; + else if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; else error->all(FLERR,"Illegal pair_style command"); iarg++; } @@ -464,602 +424,6 @@ void PairTableRX::coeff(int narg, char **arg) } -/* ---------------------------------------------------------------------- - init for one type pair i,j and corresponding j,i -------------------------------------------------------------------------- */ - -double PairTableRX::init_one(int i, int j) -{ - if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); - - tabindex[j][i] = tabindex[i][j]; - - return tables[tabindex[i][j]].cut; -} - -/* ---------------------------------------------------------------------- - read a table section from a tabulated potential file - only called by proc 0 - this function sets these values in Table: - ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits -------------------------------------------------------------------------- */ - -void PairTableRX::read_table(Table *tb, char *file, char *keyword) -{ - char line[MAXLINE]; - - // open file - - FILE *fp = force->open_potential(file); - if (fp == NULL) { - char str[128]; - sprintf(str,"Cannot open file %s",file); - error->one(FLERR,str); - } - - // loop until section found with matching keyword - - while (1) { - if (fgets(line,MAXLINE,fp) == NULL) - error->one(FLERR,"Did not find keyword in table file"); - if (strspn(line," \t\n\r") == strlen(line)) continue; // blank line - if (line[0] == '#') continue; // comment - char *word = strtok(line," \t\n\r"); - if (strcmp(word,keyword) == 0) break; // matching keyword - fgets(line,MAXLINE,fp); // no match, skip section - param_extract(tb,line); - fgets(line,MAXLINE,fp); - for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp); - } - - // read args on 2nd line of section - // allocate table arrays for file values - - fgets(line,MAXLINE,fp); - param_extract(tb,line); - memory->create(tb->rfile,tb->ninput,"pair:rfile"); - memory->create(tb->efile,tb->ninput,"pair:efile"); - memory->create(tb->ffile,tb->ninput,"pair:ffile"); - - // setup bitmap parameters for table to read in - - tb->ntablebits = 0; - int masklo,maskhi,nmask,nshiftbits; - if (tb->rflag == BMP) { - while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++; - if (1 << tb->ntablebits != tb->ninput) - error->one(FLERR,"Bitmapped table is incorrect length in table file"); - init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits); - } - - // read r,e,f table values from file - // if rflag set, compute r - // if rflag not set, use r from file - - int itmp; - double rtmp; - union_int_float_t rsq_lookup; - - fgets(line,MAXLINE,fp); - for (int i = 0; i < tb->ninput; i++) { - fgets(line,MAXLINE,fp); - sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]); - - if (tb->rflag == RLINEAR) - rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1); - else if (tb->rflag == RSQ) { - rtmp = tb->rlo*tb->rlo + - (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1); - rtmp = sqrt(rtmp); - } else if (tb->rflag == BMP) { - rsq_lookup.i = i << nshiftbits; - rsq_lookup.i |= masklo; - if (rsq_lookup.f < tb->rlo*tb->rlo) { - rsq_lookup.i = i << nshiftbits; - rsq_lookup.i |= maskhi; - } - rtmp = sqrtf(rsq_lookup.f); - } - - tb->rfile[i] = rtmp; - } - - // close file - - fclose(fp); -} - -/* ---------------------------------------------------------------------- - broadcast read-in table info from proc 0 to other procs - this function communicates these values in Table: - ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi -------------------------------------------------------------------------- */ - -void PairTableRX::bcast_table(Table *tb) -{ - MPI_Bcast(&tb->ninput,1,MPI_INT,0,world); - - int me; - MPI_Comm_rank(world,&me); - if (me > 0) { - memory->create(tb->rfile,tb->ninput,"pair:rfile"); - memory->create(tb->efile,tb->ninput,"pair:efile"); - memory->create(tb->ffile,tb->ninput,"pair:ffile"); - } - - MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world); - MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world); - MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world); - - MPI_Bcast(&tb->rflag,1,MPI_INT,0,world); - if (tb->rflag) { - MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world); - } - MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world); - if (tb->fpflag) { - MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world); - MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world); - } -} - -/* ---------------------------------------------------------------------- - build spline representation of e,f over entire range of read-in table - this function sets these values in Table: e2file,f2file -------------------------------------------------------------------------- */ - -void PairTableRX::spline_table(Table *tb) -{ - memory->create(tb->e2file,tb->ninput,"pair:e2file"); - memory->create(tb->f2file,tb->ninput,"pair:f2file"); - - double ep0 = - tb->ffile[0]; - double epn = - tb->ffile[tb->ninput-1]; - spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file); - - if (tb->fpflag == 0) { - tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]); - tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) / - (tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]); - } - - double fp0 = tb->fplo; - double fpn = tb->fphi; - spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file); -} - -/* ---------------------------------------------------------------------- - extract attributes from parameter line in table section - format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi - N is required, other params are optional -------------------------------------------------------------------------- */ - -void PairTableRX::param_extract(Table *tb, char *line) -{ - tb->ninput = 0; - tb->rflag = NONE; - tb->fpflag = 0; - - char *word = strtok(line," \t\n\r\f"); - while (word) { - if (strcmp(word,"N") == 0) { - word = strtok(NULL," \t\n\r\f"); - tb->ninput = atoi(word); - } else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 || - strcmp(word,"BITMAP") == 0) { - if (strcmp(word,"R") == 0) tb->rflag = RLINEAR; - else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ; - else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP; - word = strtok(NULL," \t\n\r\f"); - tb->rlo = atof(word); - word = strtok(NULL," \t\n\r\f"); - tb->rhi = atof(word); - } else if (strcmp(word,"FP") == 0) { - tb->fpflag = 1; - word = strtok(NULL," \t\n\r\f"); - tb->fplo = atof(word); - word = strtok(NULL," \t\n\r\f"); - tb->fphi = atof(word); - } else { - printf("WORD: %s\n",word); - error->one(FLERR,"Invalid keyword in pair table parameters"); - } - word = strtok(NULL," \t\n\r\f"); - } - - if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N"); -} - -/* ---------------------------------------------------------------------- - compute r,e,f vectors from splined values -------------------------------------------------------------------------- */ - -void PairTableRX::compute_table(Table *tb) -{ - int tlm1 = tablength-1; - - // inner = inner table bound - // cut = outer table bound - // delta = table spacing in rsq for N-1 bins - - double inner; - if (tb->rflag) inner = tb->rlo; - else inner = tb->rfile[0]; - tb->innersq = double(inner)*double(inner); - tb->delta = double(tb->cut*tb->cut - double(tb->innersq)) / double(tlm1); - tb->invdelta = 1.0/double(tb->delta); - - // direct lookup tables - // N-1 evenly spaced bins in rsq from inner to cut - // e,f = value at midpt of bin - // e,f are N-1 in length since store 1 value at bin midpt - // f is converted to f/r when stored in f[i] - // e,f are never a match to read-in values, always computed via spline interp - - if (tabstyle == LOOKUP) { - memory->create(tb->e,tlm1,"pair:e"); - memory->create(tb->f,tlm1,"pair:f"); - - double r,rsq; - for (int i = 0; i < tlm1; i++) { - rsq = tb->innersq + (i+0.5)*tb->delta; - r = sqrt(rsq); - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - } - - // linear tables - // N-1 evenly spaced bins in rsq from inner to cut - // rsq,e,f = value at lower edge of bin - // de,df values = delta from lower edge to upper edge of bin - // rsq,e,f are N in length so de,df arrays can compute difference - // f is converted to f/r when stored in f[i] - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == LINEAR) { - memory->create(tb->rsq,tablength,"pair:rsq"); - memory->create(tb->e,tablength,"pair:e"); - memory->create(tb->f,tablength,"pair:f"); - memory->create(tb->de,tlm1,"pair:de"); - memory->create(tb->df,tlm1,"pair:df"); - - double r,rsq; - for (int i = 0; i < tablength; i++) { - rsq = tb->innersq + i*tb->delta; - r = sqrt(rsq); - tb->rsq[i] = rsq; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - } - - for (int i = 0; i < tlm1; i++) { - tb->de[i] = tb->e[i+1] - tb->e[i]; - tb->df[i] = tb->f[i+1] - tb->f[i]; - } - } - - // cubic spline tables - // N-1 evenly spaced bins in rsq from inner to cut - // rsq,e,f = value at lower edge of bin - // e2,f2 = spline coefficient for each bin - // rsq,e,f,e2,f2 are N in length so have N-1 spline bins - // f is converted to f/r after e is splined - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == SPLINE) { - memory->create(tb->rsq,tablength,"pair:rsq"); - memory->create(tb->e,tablength,"pair:e"); - memory->create(tb->f,tablength,"pair:f"); - memory->create(tb->e2,tablength,"pair:e2"); - memory->create(tb->f2,tablength,"pair:f2"); - - tb->deltasq6 = tb->delta*tb->delta / 6.0; - - double r,rsq; - for (int i = 0; i < tablength; i++) { - rsq = tb->innersq + i*tb->delta; - r = sqrt(rsq); - tb->rsq[i] = rsq; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r); - } - } - - // ep0,epn = dh/dg at inner and at cut - // h(r) = e(r) and g(r) = r^2 - // dh/dg = (de/dr) / 2r = -f/2r - - double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq)); - double epn = - tb->f[tlm1] / (2.0 * tb->cut); - spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2); - - // fp0,fpn = dh/dg at inner and at cut - // h(r) = f(r)/r and g(r) = r^2 - // dh/dg = (1/r df/dr - f/r^2) / 2r - // dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1)) - - double fp0,fpn; - double secant_factor = 0.1; - if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) / - (2.0 * sqrt(tb->innersq)); - else { - double rsq1 = tb->innersq; - double rsq2 = rsq1 + secant_factor*tb->delta; - fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) / - sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta); - } - - if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn = - (tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut); - else { - double rsq2 = tb->cut * tb->cut; - double rsq1 = rsq2 - secant_factor*tb->delta; - fpn = (tb->f[tlm1] / sqrt(rsq2) - - splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) / - sqrt(rsq1)) / (secant_factor*tb->delta); - } - - for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]); - spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2); - } - - // bitmapped linear tables - // 2^N bins from inner to cut, spaced in bitmapped manner - // f is converted to f/r when stored in f[i] - // e,f can match read-in values, else compute via spline interp - - if (tabstyle == BITMAP) { - double r; - union_int_float_t rsq_lookup; - int masklo,maskhi; - - // linear lookup tables of length ntable = 2^n - // stored value = value at lower edge of bin - - init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits); - int ntable = 1 << tablength; - int ntablem1 = ntable - 1; - - memory->create(tb->rsq,ntable,"pair:rsq"); - memory->create(tb->e,ntable,"pair:e"); - memory->create(tb->f,ntable,"pair:f"); - memory->create(tb->de,ntable,"pair:de"); - memory->create(tb->df,ntable,"pair:df"); - memory->create(tb->drsq,ntable,"pair:drsq"); - - union_int_float_t minrsq_lookup; - minrsq_lookup.i = 0 << tb->nshiftbits; - minrsq_lookup.i |= maskhi; - - for (int i = 0; i < ntable; i++) { - rsq_lookup.i = i << tb->nshiftbits; - rsq_lookup.i |= masklo; - if (rsq_lookup.f < tb->innersq) { - rsq_lookup.i = i << tb->nshiftbits; - rsq_lookup.i |= maskhi; - } - r = sqrtf(rsq_lookup.f); - tb->rsq[i] = rsq_lookup.f; - if (tb->match) { - tb->e[i] = tb->efile[i]; - tb->f[i] = tb->ffile[i]/r; - } else { - tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - } - minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); - } - - tb->innersq = minrsq_lookup.f; - - for (int i = 0; i < ntablem1; i++) { - tb->de[i] = tb->e[i+1] - tb->e[i]; - tb->df[i] = tb->f[i+1] - tb->f[i]; - tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]); - } - - // get the delta values for the last table entries - // tables are connected periodically between 0 and ntablem1 - - tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1]; - tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1]; - tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]); - - // get the correct delta values at itablemax - // smallest r is in bin itablemin - // largest r is in bin itablemax, which is itablemin-1, - // or ntablem1 if itablemin=0 - - // deltas at itablemax only needed if corresponding rsq < cut*cut - // if so, compute deltas between rsq and cut*cut - // if tb->match, data at cut*cut is unavailable, so we'll take - // deltas at itablemax-1 as a good approximation - - double e_tmp,f_tmp; - int itablemin = minrsq_lookup.i & tb->nmask; - itablemin >>= tb->nshiftbits; - int itablemax = itablemin - 1; - if (itablemin == 0) itablemax = ntablem1; - int itablemaxm1 = itablemax - 1; - if (itablemax == 0) itablemaxm1 = ntablem1; - rsq_lookup.i = itablemax << tb->nshiftbits; - rsq_lookup.i |= maskhi; - if (rsq_lookup.f < tb->cut*tb->cut) { - if (tb->match) { - tb->de[itablemax] = tb->de[itablemaxm1]; - tb->df[itablemax] = tb->df[itablemaxm1]; - tb->drsq[itablemax] = tb->drsq[itablemaxm1]; - } else { - rsq_lookup.f = tb->cut*tb->cut; - r = sqrtf(rsq_lookup.f); - e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); - f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; - tb->de[itablemax] = e_tmp - tb->e[itablemax]; - tb->df[itablemax] = f_tmp - tb->f[itablemax]; - tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]); - } - } - } -} - -/* ---------------------------------------------------------------------- - set all ptrs in a table to NULL, so can be freed safely -------------------------------------------------------------------------- */ - -void PairTableRX::null_table(Table *tb) -{ - tb->rfile = tb->efile = tb->ffile = NULL; - tb->e2file = tb->f2file = NULL; - tb->rsq = tb->drsq = tb->e = tb->de = NULL; - tb->f = tb->df = tb->e2 = tb->f2 = NULL; -} - -/* ---------------------------------------------------------------------- - free all arrays in a table -------------------------------------------------------------------------- */ - -void PairTableRX::free_table(Table *tb) -{ - memory->destroy(tb->rfile); - memory->destroy(tb->efile); - memory->destroy(tb->ffile); - memory->destroy(tb->e2file); - memory->destroy(tb->f2file); - - memory->destroy(tb->rsq); - memory->destroy(tb->drsq); - memory->destroy(tb->e); - memory->destroy(tb->de); - memory->destroy(tb->f); - memory->destroy(tb->df); - memory->destroy(tb->e2); - memory->destroy(tb->f2); -} - -/* ---------------------------------------------------------------------- - spline and splint routines modified from Numerical Recipes -------------------------------------------------------------------------- */ - -void PairTableRX::spline(double *x, double *y, int n, - double yp1, double ypn, double *y2) -{ - int i,k; - double p,qn,sig,un; - double *u = new double[n]; - - if (yp1 > 0.99e30) y2[0] = u[0] = 0.0; - else { - y2[0] = -0.5; - u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1); - } - for (i = 1; i < n-1; i++) { - sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]); - p = sig*y2[i-1] + 2.0; - y2[i] = (sig-1.0) / p; - u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]); - u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p; - } - if (ypn > 0.99e30) qn = un = 0.0; - else { - qn = 0.5; - un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2])); - } - y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0); - for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k]; - - delete [] u; -} - -/* ---------------------------------------------------------------------- */ - -double PairTableRX::splint(double *xa, double *ya, double *y2a, int n, double x) -{ - int klo,khi,k; - double h,b,a,y; - - klo = 0; - khi = n-1; - while (khi-klo > 1) { - k = (khi+klo) >> 1; - if (xa[k] > x) khi = k; - else klo = k; - } - h = xa[khi]-xa[klo]; - a = (xa[khi]-x) / h; - b = (x-xa[klo]) / h; - y = a*ya[klo] + b*ya[khi] + - ((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0; - return y; -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairTableRX::write_restart(FILE *fp) -{ - write_restart_settings(fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairTableRX::read_restart(FILE *fp) -{ - read_restart_settings(fp); - allocate(); -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairTableRX::write_restart_settings(FILE *fp) -{ - fwrite(&tabstyle,sizeof(int),1,fp); - fwrite(&tablength,sizeof(int),1,fp); - fwrite(&ewaldflag,sizeof(int),1,fp); - fwrite(&pppmflag,sizeof(int),1,fp); - fwrite(&msmflag,sizeof(int),1,fp); - fwrite(&dispersionflag,sizeof(int),1,fp); - fwrite(&tip4pflag,sizeof(int),1,fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairTableRX::read_restart_settings(FILE *fp) -{ - if (comm->me == 0) { - fread(&tabstyle,sizeof(int),1,fp); - fread(&tablength,sizeof(int),1,fp); - fread(&ewaldflag,sizeof(int),1,fp); - fread(&pppmflag,sizeof(int),1,fp); - fread(&msmflag,sizeof(int),1,fp); - fread(&dispersionflag,sizeof(int),1,fp); - fread(&tip4pflag,sizeof(int),1,fp); - } - MPI_Bcast(&tabstyle,1,MPI_INT,0,world); - MPI_Bcast(&tablength,1,MPI_INT,0,world); - MPI_Bcast(&ewaldflag,1,MPI_INT,0,world); - MPI_Bcast(&pppmflag,1,MPI_INT,0,world); - MPI_Bcast(&msmflag,1,MPI_INT,0,world); - MPI_Bcast(&dispersionflag,1,MPI_INT,0,world); - MPI_Bcast(&tip4pflag,1,MPI_INT,0,world); -} - /* ---------------------------------------------------------------------- */ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq, @@ -1131,26 +495,6 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq, return factor_lj*phi; } -/* ---------------------------------------------------------------------- - return the Coulomb cutoff for tabled potentials - called by KSpace solvers which require that all pairwise cutoffs be the same - loop over all tables not just those indexed by tabindex[i][j] since - no way to know which tables are active since pair::init() not yet called -------------------------------------------------------------------------- */ - -void *PairTableRX::extract(const char *str, int &dim) -{ - if (strcmp(str,"cut_coul") != 0) return NULL; - if (ntables == 0) error->all(FLERR,"All pair coeffs are not set"); - - double cut_coul = tables[0].cut; - for (int m = 1; m < ntables; m++) - if (tables[m].cut != cut_coul) - error->all(FLERR,"Pair table cutoffs must all be equal to use with KSpace"); - dim = 0; - return &tables[0].cut; -} - /* ---------------------------------------------------------------------- */ void PairTableRX::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index c6afe6a8d5..4f80872029 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -20,11 +20,11 @@ PairStyle(table/rx,PairTableRX) #ifndef LMP_PAIR_TABLE_RX_H #define LMP_PAIR_TABLE_RX_H -#include "pair.h" +#include "pair_table.h" namespace LAMMPS_NS { -class PairTableRX : public Pair { +class PairTableRX : public PairTable { public: PairTableRX(class LAMMPS *); virtual ~PairTableRX(); @@ -32,43 +32,11 @@ class PairTableRX : public Pair { virtual void compute(int, int); void settings(int, char **); void coeff(int, char **); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - double single(int, int, int, int, double, double, double, double &); - void *extract(const char *, int &); + virtual double single(int, int, int, int, double, double, double, double &); protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; - int tabstyle,tablength; - struct Table { - int ninput,rflag,fpflag,match,ntablebits; - int nshiftbits,nmask; - double rlo,rhi,fplo,fphi,cut; - double *rfile,*efile,*ffile; - double *e2file,*f2file; - double innersq,delta,invdelta,deltasq6; - double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2; - }; - int ntables; - Table *tables; - - int **tabindex; - - void allocate(); - void read_table(Table *, char *, char *); - void param_extract(Table *, char *); - void bcast_table(Table *); - void spline_table(Table *); - void compute_table(Table *); - void null_table(Table *); - void free_table(Table *); - void spline(double *, double *, int, double, double, double *); - double splint(double *, double *, double *, int, double); - int nspecies; char *site1, *site2; int isite1, isite2; diff --git a/src/pair_table.cpp b/src/pair_table.cpp index c4bc3e7dd2..1c6bfe128e 100644 --- a/src/pair_table.cpp +++ b/src/pair_table.cpp @@ -29,8 +29,6 @@ using namespace LAMMPS_NS; -enum{NONE,RLINEAR,RSQ,BMP}; - #define MAXLINE 1024 #define EPSILONR 1.0e-6 @@ -46,6 +44,8 @@ PairTable::PairTable(LAMMPS *lmp) : Pair(lmp) PairTable::~PairTable() { + if (copymode) return; + for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); diff --git a/src/pair_table.h b/src/pair_table.h index 358491f7cf..370efcec2f 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -37,11 +37,12 @@ class PairTable : public Pair { void read_restart(FILE *); void write_restart_settings(FILE *); void read_restart_settings(FILE *); - double single(int, int, int, int, double, double, double, double &); + virtual double single(int, int, int, int, double, double, double, double &); void *extract(const char *, int &); protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + enum{NONE,RLINEAR,RSQ,BMP}; int tabstyle,tablength; struct Table { @@ -66,8 +67,8 @@ class PairTable : public Pair { void compute_table(Table *); void null_table(Table *); void free_table(Table *); - void spline(double *, double *, int, double, double, double *); - double splint(double *, double *, double *, int, double); + static void spline(double *, double *, int, double, double, double *); + static double splint(double *, double *, double *, int, double); }; } From 3941fe9ab7488bf91fca7b6c529ad2c2cd7ba35b Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 3 Jan 2017 16:42:24 -0700 Subject: [PATCH 051/267] fix compilation --- src/USER-DPD/pair_table_rx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index 4f80872029..00314ac424 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class PairTableRX : public PairTable { public: PairTableRX(class LAMMPS *); - virtual ~PairTableRX(); + virtual ~PairTableRX() {} virtual void compute(int, int); void settings(int, char **); From a1ac2ae9b7570d27148064d90ea8051c7e30c75e Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 4 Jan 2017 10:51:31 -0700 Subject: [PATCH 052/267] move enum to pair.h to avoid having it be replicated in several different locations --- src/pair.cpp | 2 -- src/pair.h | 2 ++ src/pair_table.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pair.cpp b/src/pair.cpp index 5d73a592e8..f8ae641d2f 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -43,8 +43,6 @@ using namespace LAMMPS_NS; using namespace MathConst; -enum{NONE,RLINEAR,RSQ,BMP}; - // allocate space for static class instance variable and initialize it int Pair::instance_total = 0; diff --git a/src/pair.h b/src/pair.h index 3378115e49..fbb6d8408b 100644 --- a/src/pair.h +++ b/src/pair.h @@ -32,6 +32,8 @@ class Pair : protected Pointers { friend class Info; public: + enum{NONE,RLINEAR,RSQ,BMP}; + static int instance_total; // # of Pair classes ever instantiated double eng_vdwl,eng_coul; // accumulated energies diff --git a/src/pair_table.h b/src/pair_table.h index 370efcec2f..8d5dbdb28a 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -42,7 +42,6 @@ class PairTable : public Pair { protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; - enum{NONE,RLINEAR,RSQ,BMP}; int tabstyle,tablength; struct Table { From 70927d08e734d418ed61f9492160ca23b65c6e6f Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 5 Jan 2017 13:25:30 -0700 Subject: [PATCH 053/267] remove duplicate enum --- src/USER-DPD/pair_table_rx.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index 00314ac424..9dee5df266 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -35,7 +35,6 @@ class PairTableRX : public PairTable { virtual double single(int, int, int, int, double, double, double, double &); protected: - enum{LOOKUP,LINEAR,SPLINE,BITMAP}; int nspecies; char *site1, *site2; From ad1402562d70ffa4a03e150ed9246e8ae710c684 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 5 Jan 2017 20:54:24 -0700 Subject: [PATCH 054/267] Revert "move enum to pair.h" This reverts commit a1ac2ae9b7570d27148064d90ea8051c7e30c75e. --- src/pair.cpp | 2 ++ src/pair.h | 2 -- src/pair_table.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pair.cpp b/src/pair.cpp index f8ae641d2f..5d73a592e8 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -43,6 +43,8 @@ using namespace LAMMPS_NS; using namespace MathConst; +enum{NONE,RLINEAR,RSQ,BMP}; + // allocate space for static class instance variable and initialize it int Pair::instance_total = 0; diff --git a/src/pair.h b/src/pair.h index fbb6d8408b..3378115e49 100644 --- a/src/pair.h +++ b/src/pair.h @@ -32,8 +32,6 @@ class Pair : protected Pointers { friend class Info; public: - enum{NONE,RLINEAR,RSQ,BMP}; - static int instance_total; // # of Pair classes ever instantiated double eng_vdwl,eng_coul; // accumulated energies diff --git a/src/pair_table.h b/src/pair_table.h index 8d5dbdb28a..370efcec2f 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -42,6 +42,7 @@ class PairTable : public Pair { protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + enum{NONE,RLINEAR,RSQ,BMP}; int tabstyle,tablength; struct Table { From d8ddef37ed5407a3723b854ffc8ae077fb4c9fc5 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 5 Jan 2017 20:56:37 -0700 Subject: [PATCH 055/267] put enum back in .cpp file see lammps/lammps#325 --- src/USER-DPD/pair_table_rx.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index c8d59c052d..e8f0e81057 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -31,6 +31,8 @@ using namespace LAMMPS_NS; +enum{NONE,RLINEAR,RSQ,BMP}; + #ifdef DBL_EPSILON #define MY_EPSILON (10.0*DBL_EPSILON) #else From 7201f003e57716ac7a14378127dc22fbc63954f1 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 5 Jan 2017 21:00:39 -0700 Subject: [PATCH 056/267] move another enum back see lammps/lammps#325 --- src/pair_table.cpp | 2 ++ src/pair_table.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pair_table.cpp b/src/pair_table.cpp index 1c6bfe128e..b36843ff44 100644 --- a/src/pair_table.cpp +++ b/src/pair_table.cpp @@ -29,6 +29,8 @@ using namespace LAMMPS_NS; +enum{NONE,RLINEAR,RSQ,BMP}; + #define MAXLINE 1024 #define EPSILONR 1.0e-6 diff --git a/src/pair_table.h b/src/pair_table.h index 370efcec2f..8d5dbdb28a 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -42,7 +42,6 @@ class PairTable : public Pair { protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; - enum{NONE,RLINEAR,RSQ,BMP}; int tabstyle,tablength; struct Table { From d26f1403cdb70e88abd9f9d8dced12a3ef16bd51 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 10 Jan 2017 15:22:52 -0700 Subject: [PATCH 057/267] fix race condition on rho the main bug here is the use of a local rho_i accumulator which later gets assigned back to rho[i]. in parallel, atomic additions can happen to rho[i] while the local accumulator is held; those atomic additions are lost when the accumulator is atomically assigned. we instead initialize the accumulator to zero and atomically add it back to rho[i]. --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 7cff630cb0..24502f875c 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -526,7 +526,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double ytmp = x(i,1); const double ztmp = x(i,2); - double rho_i = rho[i]; + double rho_i_contrib = 0.0; const int itype = type[i]; const int jnum = d_numneigh[i]; @@ -549,7 +549,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double tmpFactor = 1.0 - r_over_rcut; const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4; - rho_i += factor; + rho_i_contrib += factor; if (NEWTON_PAIR || j < nlocal) a_rho[j] += factor; } else if (rsq < d_cutsq(itype,jtype)) { @@ -557,14 +557,14 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double tmpFactor = 1.0-sqrt(rsq)/rcut; const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; - rho_i += factor; + rho_i_contrib += factor; if (NEWTON_PAIR || j < nlocal) a_rho[j] += factor; } } } - a_rho[i] = rho_i; + a_rho[i] += rho_i_contrib; } /* ---------------------------------------------------------------------- */ From 6abefe7ef956621d52941fb2f1778665fd6a5e3d Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 6 Jan 2017 15:41:41 -0700 Subject: [PATCH 058/267] restarting PairTableRXKokkos as an exact copy of PairTableKokkos, now that it derives from PairTable --- src/KOKKOS/Install.sh | 4 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 241 ++++++++++------------------ src/KOKKOS/pair_table_rx_kokkos.h | 185 +++++---------------- 3 files changed, 128 insertions(+), 302 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index d796de5e2f..cfda7dbf94 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -195,8 +195,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h -#action pair_table_rx_kokkos.cpp pair_table_rx.cpp -#action pair_table_rx_kokkos.h pair_table_rx.h +action pair_table_rx_kokkos.cpp pair_table_rx.cpp +action pair_table_rx_kokkos.h pair_table_rx.h action pair_tersoff_kokkos.cpp pair_tersoff.cpp action pair_tersoff_kokkos.h pair_tersoff.h action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index bf32d1c14f..2ccdefd05d 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Stan Moore (SNL) + Contributing author: Dan Ibanez (SNL) ------------------------------------------------------------------------- */ #include @@ -33,20 +33,13 @@ using namespace LAMMPS_NS; -enum{NONE,RLINEAR,RSQ,BMP}; -enum{FULL,HALFTHREAD,HALF}; - -#define MAXLINE 1024 - /* ---------------------------------------------------------------------- */ template -PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTableRX(lmp) +PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) { update_table = 0; atomKK = (AtomKokkos *) atom; - ntables = 0; - tables = NULL; execution_space = ExecutionSpaceFromDevice::space; datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; @@ -59,17 +52,12 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTableRX(lmp) template PairTableRXKokkos::~PairTableRXKokkos() { -/* for (int m = 0; m < ntables; m++) free_table(&tables[m]); - memory->sfree(tables); - - if (allocated) { - memory->destroy(setflag); - memory->destroy(cutsq); - memory->destroy(tabindex); - }*/ + if (copymode) return; delete h_table; + h_table = nullptr; delete d_table; - + d_table = nullptr; + copymode = true; //prevents base class destructor from running } /* ---------------------------------------------------------------------- */ @@ -98,7 +86,6 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; @@ -124,44 +111,44 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) EV_FLOAT ev; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == FULL) { - PairComputeFunctor,FULL,false,S_TableRXCompute > + PairComputeFunctor,FULL,false,S_TableCompute > ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,false,S_TableRXCompute > + PairComputeFunctor,HALFTHREAD,false,S_TableCompute > ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,false,S_TableRXCompute > + PairComputeFunctor,HALF,false,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == N2) { - PairComputeFunctor,N2,false,S_TableRXCompute > + PairComputeFunctor,N2,false,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); } } else { if (neighflag == FULL) { - PairComputeFunctor,FULL,true,S_TableRXCompute > + PairComputeFunctor,FULL,true,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,true,S_TableRXCompute > + PairComputeFunctor,HALFTHREAD,true,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,true,S_TableRXCompute > + PairComputeFunctor,HALF,true,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == N2) { - PairComputeFunctor,N2,true,S_TableRXCompute > + PairComputeFunctor,N2,true,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); @@ -191,27 +178,15 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c union_int_float_t rsq_lookup; double fpair; const int tidx = d_table_const.tabindex(itype,jtype); - //const Table* const tb = &tables[tabindex[itype][jtype]]; - - //if (rsq < d_table_const.innersq(tidx)) - // error->one(FLERR,"Pair distance < table inner cutoff"); - - if (Specialisation::TabStyle == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); fpair = d_table_const.f(tidx,itable); } else if (Specialisation::TabStyle == LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); } else if (Specialisation::TabStyle == SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) + @@ -237,26 +212,15 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c double evdwl; union_int_float_t rsq_lookup; const int tidx = d_table_const.tabindex(itype,jtype); - //const Table* const tb = &tables[tabindex[itype][jtype]]; - - //if (rsq < d_table_const.innersq(tidx)) - // error->one(FLERR,"Pair distance < table inner cutoff"); - if (Specialisation::TabStyle == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); evdwl = d_table_const.e(tidx,itable); } else if (Specialisation::TabStyle == LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); } else if (Specialisation::TabStyle == SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - //if (itable >= tlm1) - // error->one(FLERR,"Pair distance > table outer cutoff"); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) + @@ -314,6 +278,8 @@ void PairTableRXKokkos::create_kokkos_tables() memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); } + + for(int i=0; i < ntables; i++) { Table* tb = &tables[i]; @@ -343,36 +309,69 @@ void PairTableRXKokkos::create_kokkos_tables() Kokkos::deep_copy(d_table->nshiftbits,h_table->nshiftbits); - Kokkos::deep_copy(d_table->nmask,h_table->nmask); - Kokkos::deep_copy(d_table->innersq,h_table->innersq); - Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); - Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); - Kokkos::deep_copy(d_table->rsq,h_table->rsq); - Kokkos::deep_copy(d_table->drsq,h_table->drsq); - Kokkos::deep_copy(d_table->e,h_table->e); - Kokkos::deep_copy(d_table->de,h_table->de); - Kokkos::deep_copy(d_table->f,h_table->f); - Kokkos::deep_copy(d_table->df,h_table->df); - Kokkos::deep_copy(d_table->e2,h_table->e2); - Kokkos::deep_copy(d_table->f2,h_table->f2); - Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); - d_table_const.nshiftbits = d_table->nshiftbits; + Kokkos::deep_copy(d_table->nmask,h_table->nmask); d_table_const.nmask = d_table->nmask; + Kokkos::deep_copy(d_table->innersq,h_table->innersq); d_table_const.innersq = d_table->innersq; + Kokkos::deep_copy(d_table->invdelta,h_table->invdelta); d_table_const.invdelta = d_table->invdelta; + Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6); d_table_const.deltasq6 = d_table->deltasq6; - d_table_const.rsq = d_table->rsq; - d_table_const.drsq = d_table->drsq; - d_table_const.e = d_table->e; - d_table_const.de = d_table->de; - d_table_const.f = d_table->f; - d_table_const.df = d_table->df; - d_table_const.e2 = d_table->e2; - d_table_const.f2 = d_table->f2; + if(tabstyle == LOOKUP) { + Kokkos::deep_copy(d_table->e,h_table->e); + d_table_const.e = d_table->e; + Kokkos::deep_copy(d_table->f,h_table->f); + d_table_const.f = d_table->f; + } + + if(tabstyle == LINEAR) { + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + d_table_const.rsq = d_table->rsq; + Kokkos::deep_copy(d_table->e,h_table->e); + d_table_const.e = d_table->e; + Kokkos::deep_copy(d_table->f,h_table->f); + d_table_const.f = d_table->f; + Kokkos::deep_copy(d_table->de,h_table->de); + d_table_const.de = d_table->de; + Kokkos::deep_copy(d_table->df,h_table->df); + d_table_const.df = d_table->df; + } + + if(tabstyle == SPLINE) { + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + d_table_const.rsq = d_table->rsq; + Kokkos::deep_copy(d_table->e,h_table->e); + d_table_const.e = d_table->e; + Kokkos::deep_copy(d_table->f,h_table->f); + d_table_const.f = d_table->f; + Kokkos::deep_copy(d_table->e2,h_table->e2); + d_table_const.e2 = d_table->e2; + Kokkos::deep_copy(d_table->f2,h_table->f2); + d_table_const.f2 = d_table->f2; + } + + if(tabstyle == BITMAP) { + Kokkos::deep_copy(d_table->rsq,h_table->rsq); + d_table_const.rsq = d_table->rsq; + Kokkos::deep_copy(d_table->e,h_table->e); + d_table_const.e = d_table->e; + Kokkos::deep_copy(d_table->f,h_table->f); + d_table_const.f = d_table->f; + Kokkos::deep_copy(d_table->de,h_table->de); + d_table_const.de = d_table->de; + Kokkos::deep_copy(d_table->df,h_table->df); + d_table_const.df = d_table->df; + Kokkos::deep_copy(d_table->drsq,h_table->drsq); + d_table_const.drsq = d_table->drsq; + } Kokkos::deep_copy(d_table->cutsq,h_table->cutsq); + d_table_const.cutsq = d_table->cutsq; + Kokkos::deep_copy(d_table->tabindex,h_table->tabindex); + d_table_const.tabindex = d_table->tabindex; + update_table = 0; } @@ -389,9 +388,9 @@ void PairTableRXKokkos::allocate() memory->create(setflag,nt,nt,"pair:setflag"); memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); - d_table_const.cutsq = d_table->cutsq; d_table_const.tabindex = d_table->tabindex; + memset(&setflag[0][0],0,nt*nt*sizeof(int)); memset(&cutsq[0][0],0,nt*nt*sizeof(double)); memset(&tabindex[0][0],0,nt*nt*sizeof(int)); @@ -469,6 +468,17 @@ double PairTableRXKokkos::init_one(int i, int j) return tables[tabindex[i][j]].cut; } +/* ---------------------------------------------------------------------- + compute r,e,f vectors from splined values +------------------------------------------------------------------------- */ + +template +void PairTableRXKokkos::compute_table(Table *tb) +{ + update_table = 1; + PairTable::compute_table(tb); +} + template void PairTableRXKokkos::init_style() { @@ -496,91 +506,6 @@ void PairTableRXKokkos::init_style() } } -/* -template template -KOKKOS_INLINE_FUNCTION -void PairTableRXKokkos:: -ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &fpair, - const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const -{ - const int EFLAG = eflag; - const int NEWTON_PAIR = newton_pair; - const int VFLAG = vflag_either; - - if (EFLAG) { - if (eflag_atom) { - E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul); - if (NEWTON_PAIR || i < nlocal) eatom[i] += epairhalf; - if (NEWTON_PAIR || j < nlocal) eatom[j] += epairhalf; - } - } - - if (VFLAG) { - const E_FLOAT v0 = delx*delx*fpair; - const E_FLOAT v1 = dely*dely*fpair; - const E_FLOAT v2 = delz*delz*fpair; - const E_FLOAT v3 = delx*dely*fpair; - const E_FLOAT v4 = delx*delz*fpair; - const E_FLOAT v5 = dely*delz*fpair; - - if (vflag_global) { - if (NEIGHFLAG) { - if (NEWTON_PAIR) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; - } else { - if (i < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - if (j < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - } - } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - } - - if (vflag_atom) { - if (NEWTON_PAIR || i < nlocal) { - d_vatom(i,0) += 0.5*v0; - d_vatom(i,1) += 0.5*v1; - d_vatom(i,2) += 0.5*v2; - d_vatom(i,3) += 0.5*v3; - d_vatom(i,4) += 0.5*v4; - d_vatom(i,5) += 0.5*v5; - } - if (NEWTON_PAIR || (NEIGHFLAG && j < nlocal)) { - d_vatom(j,0) += 0.5*v0; - d_vatom(j,1) += 0.5*v1; - d_vatom(j,2) += 0.5*v2; - d_vatom(j,3) += 0.5*v3; - d_vatom(j,4) += 0.5*v4; - d_vatom(j,5) += 0.5*v5; - } - } - } -} -*/ template void PairTableRXKokkos::cleanup_copy() { // WHY needed: this prevents parent copy from deallocating any arrays diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index b379901201..c4e07d41d6 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -13,32 +13,21 @@ #ifdef PAIR_CLASS -PairStyle(table/rx/kk,PairTableRXKokkos) -PairStyle(table/rx/kk/device,PairTableRXKokkos) -PairStyle(table/rx/kk/host,PairTableRXKokkos) +PairStyle(table/rx/kk,PairTableKokkos) +PairStyle(table/rx/kk/device,PairTableKokkos) +PairStyle(table/rx/kk/host,PairTableKokkos) #else #ifndef LMP_PAIR_TABLE_RX_KOKKOS_H #define LMP_PAIR_TABLE_RX_KOKKOS_H -#include "pair_table_rx.h" -#include "pair_kokkos.h" -#include "neigh_list_kokkos.h" -#include "atom_kokkos.h" +#include "pair_table_kokkos.h" namespace LAMMPS_NS { -template -struct S_TableRXCompute { - enum {TabStyle = TABSTYLE}; -}; - -template -class PairTableRXComputeFunctor; - template -class PairTableRXKokkos : public PairTableRX { +class PairTableRXKokkos : public PairTable { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; @@ -53,27 +42,14 @@ class PairTableRXKokkos : public PairTableRX { template void compute_style(int, int); - /*template - KOKKOS_FUNCTION - EV_FLOAT compute_item(const int& i, - const NeighListKokkos &list) const; -*/ void settings(int, char **); double init_one(int, int); + void init_style(); - protected: - enum{LOOKUP,LINEAR,SPLINE,BITMAP}; - int tabstyle,tablength; - /*struct TableDeviceConst { - typename ArrayTypes::t_ffloat_2d_randomread cutsq; - typename ArrayTypes::t_int_2d_randomread tabindex; - typename ArrayTypes::t_int_1d_randomread nshiftbits,nmask; - typename ArrayTypes::t_ffloat_1d_randomread innersq,invdelta,deltasq6; - typename ArrayTypes::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2; - };*/ - //Its faster not to use texture fetch if the number of tables is less than 32! + protected: + struct TableDeviceConst { typename ArrayTypes::t_ffloat_2d cutsq; typename ArrayTypes::t_int_2d tabindex; @@ -102,12 +78,12 @@ class PairTableRXKokkos : public PairTableRX { TableDevice* d_table; TableHost* h_table; - int **tabindex; F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; typename ArrayTypes::t_ffloat_2d d_cutsq; - void allocate(); + virtual void allocate(); + void compute_table(Table *); typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array_const c_x; @@ -137,41 +113,41 @@ class PairTableRXKokkos : public PairTableRX { return 0; } - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; + friend class PairComputeFunctor >; friend void pair_virial_fdotr_compute(PairTableRXKokkos*); }; @@ -183,79 +159,4 @@ class PairTableRXKokkos : public PairTableRX { /* ERROR/WARNING messages: -E: Pair distance < table inner cutoff - -Two atoms are closer together than the pairwise table allows. - -E: Pair distance > table outer cutoff - -Two atoms are further apart than the pairwise table allows. - -E: Illegal ... command - -Self-explanatory. Check the input script syntax and compare to the -documentation for the command. You can use -echo screen as a -command-line option when running LAMMPS to see the offending line. - -E: Unknown table style in pair_style command - -Style of table is invalid for use with pair_style table command. - -E: Illegal number of pair table entries - -There must be at least 2 table entries. - -E: Invalid pair table length - -Length of read-in pair table is invalid - -E: Invalid pair table cutoff - -Cutoffs in pair_coeff command are not valid with read-in pair table. - -E: Bitmapped table in file does not match requested table - -Setting for bitmapped table in pair_coeff command must match table -in file exactly. - -E: All pair coeffs are not set - -All pair coefficients must be set in the data file or by the -pair_coeff command before running a simulation. - -E: Cannot open file %s - -The specified file cannot be opened. Check that the path and name are -correct. If the file is a compressed file, also check that the gzip -executable can be found and run. - -E: Did not find keyword in table file - -Keyword used in pair_coeff command was not found in table file. - -E: Bitmapped table is incorrect length in table file - -Number of table entries is not a correct power of 2. - -E: Invalid keyword in pair table parameters - -Keyword used in list of table parameters is not recognized. - -E: Pair table parameters did not set N - -List of pair table parameters must include N setting. - -E: Pair table cutoffs must all be equal to use with KSpace - -When using pair style table with a long-range KSpace solver, the -cutoffs for all atom type pairs must all be the same, since the -long-range solver starts at that cutoff. - -E: Cannot use chosen neighbor list style with lj/cut/kk - -That style is not supported by Kokkos. - - - - -*/ \ No newline at end of file + */ From f995bb43355f412709cf3420d4b215f39d8bbf61 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 6 Jan 2017 16:00:35 -0700 Subject: [PATCH 059/267] starting to add getMixingWeights some compile errors to work out --- src/KOKKOS/pair_table_rx_kokkos.cpp | 82 +++++++++++++++++++++++++++++ src/KOKKOS/pair_table_rx_kokkos.h | 5 ++ 2 files changed, 87 insertions(+) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 2ccdefd05d..54882ec3ce 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -33,6 +33,15 @@ using namespace LAMMPS_NS; +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + +#define OneFluidValue (-1) +#define isOneFluid(_site_) ( (_site_) == OneFluidValue ) + /* ---------------------------------------------------------------------- */ template @@ -516,6 +525,79 @@ void PairTableRXKokkos::cleanup_copy() { h_table=NULL; d_table=NULL; } +template +KOKKOS_INLINE_FUNCTION +void PairTableRXKokkos::getMixingWeights(typename DAT::t_float_2d_randomread dvector, int, double &, double &, double &, double &) { + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; + double nTotal, nTotalOld; + + nTotal = 0.0; + nTotalOld = 0.0; + for (int ispecies = 0; ispecies < nspecies; ++ispecies){ + nTotal += dvector(ispecies,id); + nTotalOld += dvector(ispecies+nspecies,id); + } + if(nTotal < MY_EPSILON || nTotalOld < MY_EPSILON) + error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); + + if (isOneFluid(isite1) == false){ + nMoleculesOld1 = dvector(isite1+nspecies,id); + nMolecules1 = dvector(isite1,id); + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; + } + if (isOneFluid(isite2) == false){ + nMoleculesOld2 = dvector(isite2+nspecies,id); + nMolecules2 = dvector(isite2,id); + fractionOld2 = nMoleculesOld2/nTotalOld; + fraction2 = nMolecules2/nTotal; + } + + if (isOneFluid(isite1) || isOneFluid(isite2)){ + nMoleculesOFAold = 0.0; + nMoleculesOFA = 0.0; + fractionOFAold = 0.0; + fractionOFA = 0.0; + + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + if (isite1 == ispecies || isite2 == ispecies) continue; + nMoleculesOFAold += dvector(ispecies+nspecies,id); + nMoleculesOFA += dvector(ispecies,id); + fractionOFAold += dvector(ispecies+nspecies,id)/nTotalOld; + fractionOFA += dvector(ispecies,id)/nTotal; + } + if(isOneFluid(isite1)){ + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); + fractionOld1 = fractionOFAold; + fraction1 = fractionOFA; + } + if(isOneFluid(isite2)){ + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); + fractionOld2 = fractionOFAold; + fraction2 = fractionOFA; + } + } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } +} + namespace LAMMPS_NS { template class PairTableRXKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index c4e07d41d6..1878faf16c 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -30,6 +30,8 @@ template class PairTableRXKokkos : public PairTable { public: + using DAT = ArrayTypes; + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; enum {COUL_FLAG=0}; typedef DeviceType device_type; @@ -150,6 +152,9 @@ class PairTableRXKokkos : public PairTable { friend class PairComputeFunctor >; friend void pair_virial_fdotr_compute(PairTableRXKokkos*); + + KOKKOS_INLINE_FUNCTION + void getMixingWeights(typename DAT::t_float_2d_randomread dvector, int, double &, double &, double &, double &); }; } From 21cde6261aa476d59e32788b036195c7ebb98498 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 9 Jan 2017 12:29:15 -0700 Subject: [PATCH 060/267] add member variables from PairTableRX --- src/KOKKOS/pair_table_rx_kokkos.cpp | 5 ++++- src/KOKKOS/pair_table_rx_kokkos.h | 9 ++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 54882ec3ce..83fcb2ce1d 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -527,7 +527,10 @@ void PairTableRXKokkos::cleanup_copy() { template KOKKOS_INLINE_FUNCTION -void PairTableRXKokkos::getMixingWeights(typename DAT::t_float_2d_randomread dvector, int, double &, double &, double &, double &) { +void PairTableRXKokkos::getMixingWeights( + typename DAT::t_float_2d_randomread dvector, int id, + double &mixWtSite1old, double &mixWtSite2old, + double &mixWtSite1, double &mixWtSite2) { double fractionOFAold, fractionOFA; double fractionOld1, fraction1; double fractionOld2, fraction2; diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 1878faf16c..0d8a8f151e 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -153,8 +153,15 @@ class PairTableRXKokkos : public PairTable { friend void pair_virial_fdotr_compute(PairTableRXKokkos*); + /* PairTableRX members */ + + int nspecies; + char *site1, *site2; + int isite1, isite2; + bool fractionalWeighting; + KOKKOS_INLINE_FUNCTION - void getMixingWeights(typename DAT::t_float_2d_randomread dvector, int, double &, double &, double &, double &); + void getMixingWeights(typename DAT::t_float_2d_randomread, int, double &, double &, double &, double &); }; } From afbc6fc628b68baae2286c1c953a29dd78f1779e Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 9 Jan 2017 13:17:23 -0700 Subject: [PATCH 061/267] added coeff, settings, single, fix compile --- src/KOKKOS/pair_table_rx_kokkos.cpp | 228 +++++++++++++++++++++++++++- src/KOKKOS/pair_table_rx_kokkos.h | 6 +- 2 files changed, 232 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 83fcb2ce1d..5a71739b6d 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -30,9 +30,12 @@ #include "memory.h" #include "error.h" #include "atom_masks.h" +#include "fix.h" using namespace LAMMPS_NS; +enum{NONE,RLINEAR,RSQ,BMP}; + #ifdef DBL_EPSILON #define MY_EPSILON (10.0*DBL_EPSILON) #else @@ -54,6 +57,7 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; h_table = new TableHost(); d_table = new TableDevice(); + fractionalWeighting = true; } /* ---------------------------------------------------------------------- */ @@ -435,6 +439,8 @@ void PairTableRXKokkos::settings(int narg, char **arg) else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1; else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1; else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1; + else if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true; + else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false; else error->all(FLERR,"Illegal pair_style command"); iarg++; } @@ -459,6 +465,148 @@ void PairTableRXKokkos::settings(int narg, char **arg) tables = NULL; } +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairTableRXKokkos::coeff(int narg, char **arg) +{ + if (narg != 6 && narg != 7) error->all(FLERR,"Illegal pair_coeff command"); + if (!allocated) allocate(); + + bool rx_flag = false; + for (int i = 0; i < modify->nfix; i++) + if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true; + if (!rx_flag) error->all(FLERR,"PairTableRX requires a fix rx command."); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + int me; + MPI_Comm_rank(world,&me); + tables = (Table *) + memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables"); + Table *tb = &tables[ntables]; + null_table(tb); + if (me == 0) read_table(tb,arg[2],arg[3]); + bcast_table(tb); + + nspecies = atom->nspecies_dpd; + if(nspecies==0) error->all(FLERR,"There are no rx species specified."); + int n; + n = strlen(arg[3]) + 1; + site1 = new char[n]; + strcpy(site1,arg[4]); + + int ispecies; + for (ispecies = 0; ispecies < nspecies; ispecies++){ + if (strcmp(site1,&atom->dname[ispecies][0]) == 0) break; + } + if (ispecies == nspecies && strcmp(site1,"1fluid") != 0) + error->all(FLERR,"Site1 name not recognized in pair coefficients"); + + n = strlen(arg[4]) + 1; + site2 = new char[n]; + strcpy(site2,arg[5]); + + for (ispecies = 0; ispecies < nspecies; ispecies++){ + if (strcmp(site2,&atom->dname[ispecies][0]) == 0) break; + } + if (ispecies == nspecies && strcmp(site2,"1fluid") != 0) + error->all(FLERR,"Site2 name not recognized in pair coefficients"); + + // set table cutoff + + if (narg == 7) tb->cut = force->numeric(FLERR,arg[6]); + else if (tb->rflag) tb->cut = tb->rhi; + else tb->cut = tb->rfile[tb->ninput-1]; + + // error check on table parameters + // insure cutoff is within table + // for BITMAP tables, file values can be in non-ascending order + + if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length"); + double rlo,rhi; + if (tb->rflag == 0) { + rlo = tb->rfile[0]; + rhi = tb->rfile[tb->ninput-1]; + } else { + rlo = tb->rlo; + rhi = tb->rhi; + } + if (tb->cut <= rlo || tb->cut > rhi) + error->all(FLERR,"Invalid pair table cutoff"); + if (rlo <= 0.0) error->all(FLERR,"Invalid pair table cutoff"); + + // match = 1 if don't need to spline read-in tables + // this is only the case if r values needed by final tables + // exactly match r values read from file + // for tabstyle SPLINE, always need to build spline tables + + tb->match = 0; + if (tabstyle == LINEAR && tb->ninput == tablength && + tb->rflag == RSQ && tb->rhi == tb->cut) tb->match = 1; + if (tabstyle == BITMAP && tb->ninput == 1 << tablength && + tb->rflag == BMP && tb->rhi == tb->cut) tb->match = 1; + if (tb->rflag == BMP && tb->match == 0) + error->all(FLERR,"Bitmapped table in file does not match requested table"); + + // spline read-in values and compute r,e,f vectors within table + + if (tb->match == 0) spline_table(tb); + compute_table(tb); + + // store ptr to table in tabindex + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + tabindex[i][j] = ntables; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Illegal pair_coeff command"); + ntables++; + + { + if ( strcmp(site1,"1fluid") == 0 ) + isite1 = OneFluidValue; + else { + isite1 = nspecies; + + for (int k = 0; k < nspecies; k++){ + if (strcmp(site1, atom->dname[k]) == 0){ + isite1 = k; + break; + } + } + + if (isite1 == nspecies) error->all(FLERR,"isite1 == nspecies"); + } + + if ( strcmp(site2,"1fluid") == 0 ) + isite2 = OneFluidValue; + else { + isite2 = nspecies; + + for (int k = 0; k < nspecies; k++){ + if (strcmp(site2, atom->dname[k]) == 0){ + isite2 = ispecies; + break; + } + } + + if (isite2 == nspecies) + error->all(FLERR,"isite2 == nspecies"); + } + } + +} + /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ @@ -477,6 +625,82 @@ double PairTableRXKokkos::init_one(int i, int j) return tables[tabindex[i][j]].cut; } +/* ---------------------------------------------------------------------- */ + +template +double PairTableRXKokkos::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + int itable; + double fraction,value,a,b,phi; + int tlm1 = tablength - 1; + + Table *tb = &tables[tabindex[itype][jtype]]; + double mixWtSite1_i, mixWtSite1_j; + double mixWtSite2_i, mixWtSite2_j; + double mixWtSite1old_i, mixWtSite1old_j; + double mixWtSite2old_i, mixWtSite2old_j; + + fraction = 0.0; + a = 0.0; + b = 0.0; + + typename ArrayTypes::t_float_2d_randomread h_dvector = + atomKK->k_dvector.view(); + getMixingWeights(h_dvector,i,mixWtSite1old_i,mixWtSite2old_i, + mixWtSite1_i,mixWtSite2_i); + getMixingWeights(h_dvector,j,mixWtSite1old_j,mixWtSite2old_j, + mixWtSite1_j,mixWtSite2_j); + + if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff"); + + if (tabstyle == LOOKUP) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + fforce = factor_lj * tb->f[itable]; + } else if (tabstyle == LINEAR) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + fraction = (rsq - tb->rsq[itable]) * tb->invdelta; + value = tb->f[itable] + fraction*tb->df[itable]; + fforce = factor_lj * value; + } else if (tabstyle == SPLINE) { + itable = static_cast ((rsq-tb->innersq) * tb->invdelta); + if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff"); + b = (rsq - tb->rsq[itable]) * tb->invdelta; + a = 1.0 - b; + value = a * tb->f[itable] + b * tb->f[itable+1] + + ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * + tb->deltasq6; + fforce = factor_lj * value; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; + itable >>= tb->nshiftbits; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; + value = tb->f[itable] + fraction*tb->df[itable]; + fforce = factor_lj * value; + } + + if (isite1 == isite2) fforce = sqrt(mixWtSite1_i*mixWtSite2_j)*fforce; + else fforce = (sqrt(mixWtSite1_i*mixWtSite2_j) + sqrt(mixWtSite2_i*mixWtSite1_j))*fforce; + + if (tabstyle == LOOKUP) + phi = tb->e[itable]; + else if (tabstyle == LINEAR || tabstyle == BITMAP) + phi = tb->e[itable] + fraction*tb->de[itable]; + else + phi = a * tb->e[itable] + b * tb->e[itable+1] + + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6; + + if (isite1 == isite2) phi = sqrt(mixWtSite1_i*mixWtSite2_j)*phi; + else phi = (sqrt(mixWtSite1_i*mixWtSite2_j) + sqrt(mixWtSite2_i*mixWtSite1_j))*phi; + + return factor_lj*phi; +} + /* ---------------------------------------------------------------------- compute r,e,f vectors from splined values ------------------------------------------------------------------------- */ @@ -526,9 +750,11 @@ void PairTableRXKokkos::cleanup_copy() { } template +template KOKKOS_INLINE_FUNCTION void PairTableRXKokkos::getMixingWeights( - typename DAT::t_float_2d_randomread dvector, int id, + typename ArrayTypes::t_float_2d_randomread dvector, + int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) { double fractionOFAold, fractionOFA; diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 0d8a8f151e..de6de61429 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -45,7 +45,9 @@ class PairTableRXKokkos : public PairTable { void compute_style(int, int); void settings(int, char **); + void coeff(int, char **); double init_one(int, int); + virtual double single(int, int, int, int, double, double, double, double &); void init_style(); @@ -160,8 +162,10 @@ class PairTableRXKokkos : public PairTable { int isite1, isite2; bool fractionalWeighting; + template KOKKOS_INLINE_FUNCTION - void getMixingWeights(typename DAT::t_float_2d_randomread, int, double &, double &, double &, double &); + void getMixingWeights(typename ArrayTypes::t_float_2d_randomread, + int, double &, double &, double &, double &); }; } From 4d5abe64d5cef0e1299bc7d43ac4482f25333d4f Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 9 Jan 2017 14:04:03 -0700 Subject: [PATCH 062/267] draft compute_fpair for PairTableRXKokkos --- src/KOKKOS/pair_table_rx_kokkos.cpp | 21 ++++++++++++++++++--- src/KOKKOS/pair_table_rx_kokkos.h | 5 +++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 5a71739b6d..bb6c034dc0 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -53,7 +53,7 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) update_table = 0; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DVECTOR_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; h_table = new TableHost(); d_table = new TableDevice(); @@ -121,6 +121,19 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) d_cutsq = d_table->cutsq; // loop over neighbors of my atoms + const int ntotal = atom->nlocal + atom->nghost; + mixWtSite1old_ = Kokkos::View("PairTableRxKokkos::mixWtSite1old", ntotal); + mixWtSite2old_ = Kokkos::View("PairTableRxKokkos::mixWtSite2old", ntotal); + mixWtSite1_ = Kokkos::View("PairTableRxKokkos::mixWtSite1", ntotal); + mixWtSite2_ = Kokkos::View("PairTableRxKokkos::mixWtSite2", ntotal); + + typename DAT::t_float_2d_randomread d_dvector = atomKK->k_dvector.view(); + + Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) { + getMixingWeights(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i), + mixWtSite1_(i), mixWtSite2_(i)); + }); + EV_FLOAT ev; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == FULL) { @@ -186,8 +199,6 @@ template KOKKOS_INLINE_FUNCTION F_FLOAT PairTableRXKokkos:: compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - (void) i; - (void) j; union_int_float_t rsq_lookup; double fpair; const int tidx = d_table_const.tabindex(itype,jtype); @@ -212,6 +223,9 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); } + if (isite1 == isite2) fpair *= sqrt(mixWtSite1old_(i) * mixWtSite2old_(j)); + else fpair *= (sqrt(mixWtSite1old_(i) * mixWtSite2old_(j)) + + sqrt(mixWtSite2old_(i) * mixWtSite1old_(j))); return fpair; } @@ -646,6 +660,7 @@ double PairTableRXKokkos::single(int i, int j, int itype, int jtype, a = 0.0; b = 0.0; + atomKK->k_dvector.template sync(); typename ArrayTypes::t_float_2d_randomread h_dvector = atomKK->k_dvector.view(); getMixingWeights(h_dvector,i,mixWtSite1old_i,mixWtSite2old_i, diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index de6de61429..a0d937549f 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -166,6 +166,11 @@ class PairTableRXKokkos : public PairTable { KOKKOS_INLINE_FUNCTION void getMixingWeights(typename ArrayTypes::t_float_2d_randomread, int, double &, double &, double &, double &); + + Kokkos::View mixWtSite1old_; + Kokkos::View mixWtSite2old_; + Kokkos::View mixWtSite1_; + Kokkos::View mixWtSite2_; }; } From c877c07491e32160f42dbde04a7e34e5f6637b57 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 9 Jan 2017 16:21:32 -0700 Subject: [PATCH 063/267] progress towards custom compute functor which is needed to handle uCG contributions. --- src/KOKKOS/pair_table_rx_kokkos.cpp | 220 ++++++++++++++++++++++++++-- src/KOKKOS/pair_table_rx_kokkos.h | 76 +++++----- 2 files changed, 237 insertions(+), 59 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index bb6c034dc0..cc0a416ad9 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -90,6 +90,195 @@ void PairTableRXKokkos::compute(int eflag_in, int vflag_in) compute_style(eflag_in,vflag_in); } +template +template +PairTableRXKokkos::Full::Functor( + PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr): + c(*c_ptr),f(c.f),list(*list_ptr) +{} + +template +template +PairTableRXKokkos::Full::~Functor() { + c.cleanup_copy(); + list.clean_copy(); +} + +template +template +template +KOKKOS_INLINE_FUNCTION +EV_FLOAT +PairTableRXKokkos::Functor:: +compute_item(const int& ii) { + EV_FLOAT ev; + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + + const AtomNeighborsConst jlist = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + double uCG_i = 0.0; + double uCGnew_i = 0.0; + double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; + + double mixWtSite1old_i = mixWtSite1old(i); + double mixWtSite2old_i = mixWtSite2old(i); + double mixWtSite1_i = mixWtSite1(i); + double mixWtSite2_i = mixWtSite2(i); + + for (int jj = 0; jj < jnum; jj++) { + int j = jlist(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + j &= NEIGHMASK; + + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const int jtype = c.type(j); + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + double mixWtSite1old_j = mixWtSite1old[j]; + double mixWtSite2old_j = mixWtSite2old[j]; + double mixWtSite1_j = mixWtSite1[j]; + double mixWtSite2_j = mixWtSite2[j]; + + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + + bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && + (NEWTON_PAIR || j < c.nlocal); + if (do_half) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } + + auto evdwl = c.template compute_evdwl(rsq,i,j,itype,jtype); + + double evdwlOld; + if (isite1 == isite2) { + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; + evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; + } else { + evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + + sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; + evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + + sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; + } + evdwlOld *= factor_lj; + evdwl *= factor_lj; + + uCG_i += 0.5*evdwlOld; + if (do_half) uCG(j) += 0.5*evdwlOld; + + uCGnew_i += 0.5*evdwl; + if (do_half) uCGnew(j) += 0.5*evdwl; + evdwl = evdwlOld; + + ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; + + if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + + uCG(i) += uCG_i; + uCGnew(i) += uCGnew_i; + + f(i,0) += fx_i; + f(i,1) += fy_i; + f(i,2) += fz_i; + + return ev; +} + +template +template +KOKKOS_INLINE_FUNCTION +void +PairTableRXKokkos::Functor:: +ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = c.eflag; + const int NEWTON_PAIR = c.newton_pair; + const int VFLAG = c.vflag_either; + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (c.vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + if (i < c.nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < c.nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void +PairTableRXKokkos::Functor:: +operator()(const int i) const { + if (c.newton_pair) compute_item<0,1>(i); + else compute_item<0,0>(i); +} + +template +template +KOKKOS_INLINE_FUNCTION +void +PairTableRXKokkos::Functor:: +operator()(const int i, value_type &energy_virial) const { + if (c.newton_pair) energy_virial += compute_item<1,1>(i); + else energy_virial += compute_item<1,0>(i); +} + template template void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) @@ -102,9 +291,10 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; + if (eflag_atom) error->all(FLERR, "pair table/rx/kk does not handle eflag_atom\n"); + if (vflag_atom) error->all(FLERR, "pair table/rx/kk does not handle vflag_atom\n"); + atomKK->sync(execution_space,datamask_read); - //k_cutsq.template sync(); - //k_params.template sync(); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -122,10 +312,10 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) // loop over neighbors of my atoms const int ntotal = atom->nlocal + atom->nghost; - mixWtSite1old_ = Kokkos::View("PairTableRxKokkos::mixWtSite1old", ntotal); - mixWtSite2old_ = Kokkos::View("PairTableRxKokkos::mixWtSite2old", ntotal); - mixWtSite1_ = Kokkos::View("PairTableRxKokkos::mixWtSite1", ntotal); - mixWtSite2_ = Kokkos::View("PairTableRxKokkos::mixWtSite2", ntotal); + mixWtSite1old_ = Kokkos::View("PairTableRXKokkos::mixWtSite1old", ntotal); + mixWtSite2old_ = Kokkos::View("PairTableRXKokkos::mixWtSite2old", ntotal); + mixWtSite1_ = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); + mixWtSite2_ = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); typename DAT::t_float_2d_randomread d_dvector = atomKK->k_dvector.view(); @@ -195,21 +385,21 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) } template -template +template KOKKOS_INLINE_FUNCTION F_FLOAT PairTableRXKokkos:: compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { union_int_float_t rsq_lookup; double fpair; const int tidx = d_table_const.tabindex(itype,jtype); - if (Specialisation::TabStyle == LOOKUP) { + if (TABSTYLE == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); fpair = d_table_const.f(tidx,itable); - } else if (Specialisation::TabStyle == LINEAR) { + } else if (TABSTYLE == LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - } else if (Specialisation::TabStyle == SPLINE) { + } else if (TABSTYLE == SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; @@ -230,23 +420,21 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, c } template -template +template KOKKOS_INLINE_FUNCTION F_FLOAT PairTableRXKokkos:: compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - (void) i; - (void) j; double evdwl; union_int_float_t rsq_lookup; const int tidx = d_table_const.tabindex(itype,jtype); - if (Specialisation::TabStyle == LOOKUP) { + if (TABSTYLE == LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); evdwl = d_table_const.e(tidx,itable); - } else if (Specialisation::TabStyle == LINEAR) { + } else if (TABSTYLE == LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } else if (Specialisation::TabStyle == SPLINE) { + } else if (TABSTYLE == SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index a0d937549f..f717dc3f8a 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -33,7 +33,6 @@ class PairTableRXKokkos : public PairTable { using DAT = ArrayTypes; enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; - enum {COUL_FLAG=0}; typedef DeviceType device_type; PairTableRXKokkos(class LAMMPS *); @@ -111,48 +110,6 @@ class PairTableRXKokkos : public PairTable { KOKKOS_INLINE_FUNCTION F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; - template - KOKKOS_INLINE_FUNCTION - F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - return 0; - } - - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend class PairComputeFunctor >; - friend void pair_virial_fdotr_compute(PairTableRXKokkos*); /* PairTableRX members */ @@ -171,6 +128,39 @@ class PairTableRXKokkos : public PairTable { Kokkos::View mixWtSite2old_; Kokkos::View mixWtSite1_; Kokkos::View mixWtSite2_; + + /* a duplicate of PairComputeFunctor to deal with uCG */ + template + struct Functor { + using device_type = DeviceType; + typedef EV_FLOAT value_type; + PairTableRXKokkos c; + // arrays are atomic for Half(Thread) neighbor style + Kokkos::View::value> > f; + Kokkos::View::value> > uCG; + Kokkos::View::value> > uCGnew; + NeighListKokkos list; + Functor(PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr); + ~Functor(); + KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { + return j >> SBBITS & 3; + } + template + KOKKOS_INLINE_FUNCTION + EV_FLOAT compute_item(const int&, + const NeighListKokkos&, const NoCoulTag&) const; + KOKKOS_INLINE_FUNCTION + ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const + KOKKOS_INLINE_FUNCTION + void operator()(const int) const; + KOKKOS_INLINE_FUNCTION + void operator()(const int, value_type&) const; + }; }; } From e4673d7fa80b40dca606c4cf85f92e2f6d2b098b Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 9 Jan 2017 16:36:25 -0700 Subject: [PATCH 064/267] fix compilation --- src/KOKKOS/pair_table_rx_kokkos.cpp | 52 +++++++++++++---------------- src/KOKKOS/pair_table_rx_kokkos.h | 10 +++--- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index cc0a416ad9..26e335fcff 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -92,14 +92,14 @@ void PairTableRXKokkos::compute(int eflag_in, int vflag_in) template template -PairTableRXKokkos::Full::Functor( +PairTableRXKokkos::Functor::Functor( PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr): c(*c_ptr),f(c.f),list(*list_ptr) {} template template -PairTableRXKokkos::Full::~Functor() { +PairTableRXKokkos::Functor::~Functor() { c.cleanup_copy(); list.clean_copy(); } @@ -110,7 +110,7 @@ template KOKKOS_INLINE_FUNCTION EV_FLOAT PairTableRXKokkos::Functor:: -compute_item(const int& ii) { +compute_item(const int& ii) const { EV_FLOAT ev; const int i = list.d_ilist[ii]; const X_FLOAT xtmp = c.x(i,0); @@ -125,10 +125,10 @@ compute_item(const int& ii) { double uCGnew_i = 0.0; double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; - double mixWtSite1old_i = mixWtSite1old(i); - double mixWtSite2old_i = mixWtSite2old(i); - double mixWtSite1_i = mixWtSite1(i); - double mixWtSite2_i = mixWtSite2(i); + double mixWtSite1old_i = c.mixWtSite1old_(i); + double mixWtSite2old_i = c.mixWtSite2old_(i); + double mixWtSite1_i = c.mixWtSite1_(i); + double mixWtSite2_i = c.mixWtSite2_(i); for (int jj = 0; jj < jnum; jj++) { int j = jlist(jj); @@ -142,12 +142,12 @@ compute_item(const int& ii) { const int jtype = c.type(j); if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { - double mixWtSite1old_j = mixWtSite1old[j]; - double mixWtSite2old_j = mixWtSite2old[j]; - double mixWtSite1_j = mixWtSite1[j]; - double mixWtSite2_j = mixWtSite2[j]; + double mixWtSite1old_j = c.mixWtSite1old_(j); + double mixWtSite2old_j = c.mixWtSite2old_(j); + double mixWtSite1_j = c.mixWtSite1_(j); + double mixWtSite2_j = c.mixWtSite2_(j); - const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); fx_i += delx*fpair; fy_i += dely*fpair; @@ -164,7 +164,7 @@ compute_item(const int& ii) { auto evdwl = c.template compute_evdwl(rsq,i,j,itype,jtype); double evdwlOld; - if (isite1 == isite2) { + if (c.isite1 == c.isite2) { evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; } else { @@ -324,48 +324,42 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) mixWtSite1_(i), mixWtSite2_(i)); }); + if (neighflag == N2) error->all(FLERR,"pair table/rx/kk can't handle N2 yet\n"); + EV_FLOAT ev; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == FULL) { - PairComputeFunctor,FULL,false,S_TableCompute > - ff(this,(NeighListKokkos*) list); + Functor ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,false,S_TableCompute > - ff(this,(NeighListKokkos*) list); + Functor ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,false,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == N2) { - PairComputeFunctor,N2,false,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); } } else { if (neighflag == FULL) { - PairComputeFunctor,FULL,true,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,true,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,true,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); } else if (neighflag == N2) { - PairComputeFunctor,N2,true,S_TableCompute > - f(this,(NeighListKokkos*) list); + Functor f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); else Kokkos::parallel_for(nlocal,f); } diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index f717dc3f8a..c468461263 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -102,11 +102,11 @@ class PairTableRXKokkos : public PairTable { void create_kokkos_tables(); void cleanup_copy(); - template + template KOKKOS_INLINE_FUNCTION F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; - template + template KOKKOS_INLINE_FUNCTION F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; @@ -150,12 +150,12 @@ class PairTableRXKokkos : public PairTable { } template KOKKOS_INLINE_FUNCTION - EV_FLOAT compute_item(const int&, - const NeighListKokkos&, const NoCoulTag&) const; + EV_FLOAT compute_item(const int&) const; KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, - const F_FLOAT &dely, const F_FLOAT &delz) const + const F_FLOAT &dely, const F_FLOAT &delz) const; KOKKOS_INLINE_FUNCTION void operator()(const int) const; KOKKOS_INLINE_FUNCTION From 5d5751be190b198c8e8c48526d40217e0c443255 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 10 Jan 2017 12:38:48 -0700 Subject: [PATCH 065/267] fix class name in PAIR_CLASS setup --- src/KOKKOS/pair_table_rx_kokkos.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index c468461263..de9ae20e35 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -13,9 +13,9 @@ #ifdef PAIR_CLASS -PairStyle(table/rx/kk,PairTableKokkos) -PairStyle(table/rx/kk/device,PairTableKokkos) -PairStyle(table/rx/kk/host,PairTableKokkos) +PairStyle(table/rx/kk,PairTableRXKokkos) +PairStyle(table/rx/kk/device,PairTableRXKokkos) +PairStyle(table/rx/kk/host,PairTableRXKokkos) #else From 55aa91be6b88670dc16658883e30bfe0710e695e Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 10 Jan 2017 13:07:22 -0700 Subject: [PATCH 066/267] copy uCG and uCGnew correctly --- src/KOKKOS/pair_table_rx_kokkos.cpp | 9 ++++++--- src/KOKKOS/pair_table_rx_kokkos.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 26e335fcff..0cb2f11efc 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -53,8 +53,9 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) update_table = 0; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DVECTOR_MASK; - datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | + DVECTOR_MASK | UCG_MASK | UCGNEW_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK; h_table = new TableHost(); d_table = new TableDevice(); fractionalWeighting = true; @@ -94,7 +95,7 @@ template template PairTableRXKokkos::Functor::Functor( PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr): - c(*c_ptr),f(c.f),list(*list_ptr) + c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr) {} template @@ -301,6 +302,8 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) x = c_x = atomKK->k_x.view(); f = atomKK->k_f.view(); type = atomKK->k_type.view(); + uCG = atomKK->k_uCG.view(); + uCGnew = atomKK->k_uCGnew.view(); nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; special_lj[0] = force->special_lj[0]; diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index de9ae20e35..ad8071800f 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -92,6 +92,8 @@ class PairTableRXKokkos : public PairTable { typename ArrayTypes::t_x_array_const c_x; typename ArrayTypes::t_f_array f; typename ArrayTypes::t_int_1d_randomread type; + typename ArrayTypes::t_efloat_1d uCG; + typename ArrayTypes::t_efloat_1d uCGnew; typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; From d65676e981bdb5d8a9cc2c4dad9c8dfb09ea1d74 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 10 Jan 2017 16:08:55 -0700 Subject: [PATCH 067/267] make everything public to appease NVCC --- src/KOKKOS/pair_table_rx_kokkos.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index ad8071800f..ed0f0c2eb2 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -50,9 +50,6 @@ class PairTableRXKokkos : public PairTable { void init_style(); - - protected: - struct TableDeviceConst { typename ArrayTypes::t_ffloat_2d cutsq; typename ArrayTypes::t_int_2d tabindex; @@ -97,7 +94,6 @@ class PairTableRXKokkos : public PairTable { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - protected: int nlocal,nall,eflag,vflag,neighflag,newton_pair; int update_table; @@ -163,6 +159,7 @@ class PairTableRXKokkos : public PairTable { KOKKOS_INLINE_FUNCTION void operator()(const int, value_type&) const; }; + }; } From 6a9a0e8c334f60ccc8a6e4a8ff19308cac09a156 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 09:25:13 -0700 Subject: [PATCH 068/267] tracking down some invalid reads... --- src/KOKKOS/Install.sh | 4 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 252 ++++++++++++++-------------- src/KOKKOS/pair_table_rx_kokkos.h | 4 +- 3 files changed, 129 insertions(+), 131 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index cfda7dbf94..e76f62d65d 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -28,8 +28,8 @@ action () { # force rebuild of files with LMP_KOKKOS switch -touch ../accelerator_kokkos.h -touch ../memory.h +#touch ../accelerator_kokkos.h +#touch ../memory.h # list of files with optional dependcies diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 0cb2f11efc..6c7c7b0efe 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -94,15 +94,15 @@ void PairTableRXKokkos::compute(int eflag_in, int vflag_in) template template PairTableRXKokkos::Functor::Functor( - PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr): - c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr) + PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr)//: +//c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr) {} template template PairTableRXKokkos::Functor::~Functor() { - c.cleanup_copy(); - list.clean_copy(); +//c.cleanup_copy(); +//list.clean_copy(); } template @@ -113,89 +113,89 @@ EV_FLOAT PairTableRXKokkos::Functor:: compute_item(const int& ii) const { EV_FLOAT ev; - const int i = list.d_ilist[ii]; - const X_FLOAT xtmp = c.x(i,0); - const X_FLOAT ytmp = c.x(i,1); - const X_FLOAT ztmp = c.x(i,2); - const int itype = c.type(i); +//const int i = list.d_ilist[ii]; +//const X_FLOAT xtmp = c.x(i,0); +//const X_FLOAT ytmp = c.x(i,1); +//const X_FLOAT ztmp = c.x(i,2); +//const int itype = c.type(i); - const AtomNeighborsConst jlist = list.get_neighbors_const(i); - const int jnum = list.d_numneigh[i]; +//const AtomNeighborsConst jlist = list.get_neighbors_const(i); +//const int jnum = list.d_numneigh[i]; - double uCG_i = 0.0; - double uCGnew_i = 0.0; - double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; +//double uCG_i = 0.0; +//double uCGnew_i = 0.0; +//double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; - double mixWtSite1old_i = c.mixWtSite1old_(i); - double mixWtSite2old_i = c.mixWtSite2old_(i); - double mixWtSite1_i = c.mixWtSite1_(i); - double mixWtSite2_i = c.mixWtSite2_(i); +//double mixWtSite1old_i = c.mixWtSite1old_(i); +//double mixWtSite2old_i = c.mixWtSite2old_(i); +//double mixWtSite1_i = c.mixWtSite1_(i); +//double mixWtSite2_i = c.mixWtSite2_(i); - for (int jj = 0; jj < jnum; jj++) { - int j = jlist(jj); - const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; - j &= NEIGHMASK; +//for (int jj = 0; jj < jnum; jj++) { +// int j = jlist(jj); +// const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; +// j &= NEIGHMASK; - const X_FLOAT delx = xtmp - c.x(j,0); - const X_FLOAT dely = ytmp - c.x(j,1); - const X_FLOAT delz = ztmp - c.x(j,2); - const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const int jtype = c.type(j); +// const X_FLOAT delx = xtmp - c.x(j,0); +// const X_FLOAT dely = ytmp - c.x(j,1); +// const X_FLOAT delz = ztmp - c.x(j,2); +// const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; +// const int jtype = c.type(j); - if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { - double mixWtSite1old_j = c.mixWtSite1old_(j); - double mixWtSite2old_j = c.mixWtSite2old_(j); - double mixWtSite1_j = c.mixWtSite1_(j); - double mixWtSite2_j = c.mixWtSite2_(j); +// if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { +// double mixWtSite1old_j = c.mixWtSite1old_(j); +// double mixWtSite2old_j = c.mixWtSite2old_(j); +// double mixWtSite1_j = c.mixWtSite1_(j); +// double mixWtSite2_j = c.mixWtSite2_(j); - const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); +// const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); - fx_i += delx*fpair; - fy_i += dely*fpair; - fz_i += delz*fpair; +// fx_i += delx*fpair; +// fy_i += dely*fpair; +// fz_i += delz*fpair; - bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && - (NEWTON_PAIR || j < c.nlocal); - if (do_half) { - f(j,0) -= delx*fpair; - f(j,1) -= dely*fpair; - f(j,2) -= delz*fpair; - } +// bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && +// (NEWTON_PAIR || j < c.nlocal); +// if (do_half) { +// f(j,0) -= delx*fpair; +// f(j,1) -= dely*fpair; +// f(j,2) -= delz*fpair; +// } - auto evdwl = c.template compute_evdwl(rsq,i,j,itype,jtype); +// auto evdwl = c.template compute_evdwl(rsq,i,j,itype,jtype); - double evdwlOld; - if (c.isite1 == c.isite2) { - evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; - evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; - } else { - evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + - sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; - evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + - sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; - } - evdwlOld *= factor_lj; - evdwl *= factor_lj; +// double evdwlOld; +// if (c.isite1 == c.isite2) { +// evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; +// evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; +// } else { +// evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + +// sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; +// evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + +// sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; +// } +// evdwlOld *= factor_lj; +// evdwl *= factor_lj; - uCG_i += 0.5*evdwlOld; - if (do_half) uCG(j) += 0.5*evdwlOld; +// uCG_i += 0.5*evdwlOld; +// if (do_half) uCG(j) += 0.5*evdwlOld; - uCGnew_i += 0.5*evdwl; - if (do_half) uCGnew(j) += 0.5*evdwl; - evdwl = evdwlOld; +// uCGnew_i += 0.5*evdwl; +// if (do_half) uCGnew(j) += 0.5*evdwl; +// evdwl = evdwlOld; - ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; +// ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; - if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); - } - } +// if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); +// } +//} - uCG(i) += uCG_i; - uCGnew(i) += uCGnew_i; +//uCG(i) += uCG_i; +//uCGnew(i) += uCGnew_i; - f(i,0) += fx_i; - f(i,1) += fy_i; - f(i,2) += fz_i; +//f(i,0) += fx_i; +//f(i,1) += fy_i; +//f(i,2) += fz_i; return ev; } @@ -209,55 +209,55 @@ ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - const int EFLAG = c.eflag; - const int NEWTON_PAIR = c.newton_pair; - const int VFLAG = c.vflag_either; +//const int EFLAG = c.eflag; +//const int NEWTON_PAIR = c.newton_pair; +//const int VFLAG = c.vflag_either; - if (VFLAG) { - const E_FLOAT v0 = delx*delx*fpair; - const E_FLOAT v1 = dely*dely*fpair; - const E_FLOAT v2 = delz*delz*fpair; - const E_FLOAT v3 = delx*dely*fpair; - const E_FLOAT v4 = delx*delz*fpair; - const E_FLOAT v5 = dely*delz*fpair; +//if (VFLAG) { +// const E_FLOAT v0 = delx*delx*fpair; +// const E_FLOAT v1 = dely*dely*fpair; +// const E_FLOAT v2 = delz*delz*fpair; +// const E_FLOAT v3 = delx*dely*fpair; +// const E_FLOAT v4 = delx*delz*fpair; +// const E_FLOAT v5 = dely*delz*fpair; - if (c.vflag_global) { - if (NEIGHFLAG!=FULL) { - if (NEWTON_PAIR) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; - } else { - if (i < c.nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - if (j < c.nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - } - } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - } - } +// if (c.vflag_global) { +// if (NEIGHFLAG!=FULL) { +// if (NEWTON_PAIR) { +// ev.v[0] += v0; +// ev.v[1] += v1; +// ev.v[2] += v2; +// ev.v[3] += v3; +// ev.v[4] += v4; +// ev.v[5] += v5; +// } else { +// if (i < c.nlocal) { +// ev.v[0] += 0.5*v0; +// ev.v[1] += 0.5*v1; +// ev.v[2] += 0.5*v2; +// ev.v[3] += 0.5*v3; +// ev.v[4] += 0.5*v4; +// ev.v[5] += 0.5*v5; +// } +// if (j < c.nlocal) { +// ev.v[0] += 0.5*v0; +// ev.v[1] += 0.5*v1; +// ev.v[2] += 0.5*v2; +// ev.v[3] += 0.5*v3; +// ev.v[4] += 0.5*v4; +// ev.v[5] += 0.5*v5; +// } +// } +// } else { +// ev.v[0] += 0.5*v0; +// ev.v[1] += 0.5*v1; +// ev.v[2] += 0.5*v2; +// ev.v[3] += 0.5*v3; +// ev.v[4] += 0.5*v4; +// ev.v[5] += 0.5*v5; +// } +// } +//} } template @@ -266,8 +266,8 @@ KOKKOS_INLINE_FUNCTION void PairTableRXKokkos::Functor:: operator()(const int i) const { - if (c.newton_pair) compute_item<0,1>(i); - else compute_item<0,0>(i); +//if (c.newton_pair) compute_item<0,1>(i); +//else compute_item<0,0>(i); } template @@ -276,8 +276,8 @@ KOKKOS_INLINE_FUNCTION void PairTableRXKokkos::Functor:: operator()(const int i, value_type &energy_virial) const { - if (c.newton_pair) energy_virial += compute_item<1,1>(i); - else energy_virial += compute_item<1,0>(i); +//if (c.newton_pair) energy_virial += compute_item<1,1>(i); +//else energy_virial += compute_item<1,0>(i); } template @@ -322,10 +322,10 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) typename DAT::t_float_2d_randomread d_dvector = atomKK->k_dvector.view(); - Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) { - getMixingWeights(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i), - mixWtSite1_(i), mixWtSite2_(i)); - }); +//Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) { +// getMixingWeights(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i), +// mixWtSite1_(i), mixWtSite2_(i)); +//}); if (neighflag == N2) error->all(FLERR,"pair table/rx/kk can't handle N2 yet\n"); @@ -971,8 +971,6 @@ void PairTableRXKokkos::getMixingWeights( nTotal += dvector(ispecies,id); nTotalOld += dvector(ispecies+nspecies,id); } - if(nTotal < MY_EPSILON || nTotalOld < MY_EPSILON) - error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); if (isOneFluid(isite1) == false){ nMoleculesOld1 = dvector(isite1+nspecies,id); diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index ed0f0c2eb2..b71f57076d 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -132,7 +132,7 @@ class PairTableRXKokkos : public PairTable { struct Functor { using device_type = DeviceType; typedef EV_FLOAT value_type; - PairTableRXKokkos c; + //PairTableRXKokkos c; // arrays are atomic for Half(Thread) neighbor style Kokkos::View::value> > f; @@ -140,7 +140,7 @@ class PairTableRXKokkos : public PairTable { device_type,Kokkos::MemoryTraits::value> > uCG; Kokkos::View::value> > uCGnew; - NeighListKokkos list; + //NeighListKokkos list; Functor(PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr); ~Functor(); KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { From b5ff41f5efedd0aea4674263c7f4d620308e3100 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 11:10:33 -0700 Subject: [PATCH 069/267] made MixingWeights code non-member CUDA was simply giving too many errors dealing with captures of member variables. --- src/KOKKOS/pair_table_rx_kokkos.cpp | 199 ++++++++++++++++------------ 1 file changed, 114 insertions(+), 85 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 6c7c7b0efe..63db613538 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -31,6 +31,7 @@ #include "error.h" #include "atom_masks.h" #include "fix.h" +#include using namespace LAMMPS_NS; @@ -45,6 +46,92 @@ enum{NONE,RLINEAR,RSQ,BMP}; #define OneFluidValue (-1) #define isOneFluid(_site_) ( (_site_) == OneFluidValue ) +template +KOKKOS_INLINE_FUNCTION +void getMixingWeights( + typename ArrayTypes::t_float_2d_randomread dvector, + int nspecies, + int isite1, int isite2, + bool fractionalWeighting, + int id, + double &mixWtSite1old, double &mixWtSite2old, + double &mixWtSite1, double &mixWtSite2) { + double fractionOFAold, fractionOFA; + double fractionOld1, fraction1; + double fractionOld2, fraction2; + double nMoleculesOFAold, nMoleculesOFA; + double nMoleculesOld1, nMolecules1; + double nMoleculesOld2, nMolecules2; + double nTotal, nTotalOld; + + nTotal = 0.0; + nTotalOld = 0.0; + assert(id >= 0); + assert(id < dvector.dimension_1()); + for (int ispecies = 0; ispecies < nspecies; ++ispecies){ + assert(ispecies < dvector.dimension_0()); + nTotal += dvector(ispecies,id); + assert(ispecies+nspecies < dvector.dimension_0()); + nTotalOld += dvector(ispecies+nspecies,id); + } + + assert(isite1 >= 0); + assert(isite1 < nspecies); + assert(isite2 >= 0); + assert(isite2 < nspecies); + if (isOneFluid(isite1) == false){ + nMoleculesOld1 = dvector(isite1+nspecies,id); + nMolecules1 = dvector(isite1,id); + fractionOld1 = nMoleculesOld1/nTotalOld; + fraction1 = nMolecules1/nTotal; + } + if (isOneFluid(isite2) == false){ + nMoleculesOld2 = dvector(isite2+nspecies,id); + nMolecules2 = dvector(isite2,id); + fractionOld2 = nMoleculesOld2/nTotalOld; + fraction2 = nMolecules2/nTotal; + } + + if (isOneFluid(isite1) || isOneFluid(isite2)){ + nMoleculesOFAold = 0.0; + nMoleculesOFA = 0.0; + fractionOFAold = 0.0; + fractionOFA = 0.0; + + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + if (isite1 == ispecies || isite2 == ispecies) continue; + nMoleculesOFAold += dvector(ispecies+nspecies,id); + nMoleculesOFA += dvector(ispecies,id); + fractionOFAold += dvector(ispecies+nspecies,id)/nTotalOld; + fractionOFA += dvector(ispecies,id)/nTotal; + } + if(isOneFluid(isite1)){ + nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules1 = 1.0-(nTotal-nMoleculesOFA); + fractionOld1 = fractionOFAold; + fraction1 = fractionOFA; + } + if(isOneFluid(isite2)){ + nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); + nMolecules2 = 1.0-(nTotal-nMoleculesOFA); + fractionOld2 = fractionOFAold; + fraction2 = fractionOFA; + } + } + + if(fractionalWeighting){ + mixWtSite1old = fractionOld1; + mixWtSite1 = fraction1; + mixWtSite2old = fractionOld2; + mixWtSite2 = fraction2; + } else { + mixWtSite1old = nMoleculesOld1; + mixWtSite1 = nMolecules1; + mixWtSite2old = nMoleculesOld2; + mixWtSite2 = nMolecules2; + } +} + /* ---------------------------------------------------------------------- */ template @@ -280,6 +367,24 @@ operator()(const int i, value_type &energy_virial) const { //else energy_virial += compute_item<1,0>(i); } +template +static void getAllMixingWeights( + int ntotal, + typename ArrayTypes::t_float_2d_randomread dvector, + int nspecies, + int isite1, int isite2, + bool fractionalWeighting, + Kokkos::View mixWtSite1old, + Kokkos::View mixWtSite2old, + Kokkos::View mixWtSite1, + Kokkos::View mixWtSite2) { + Kokkos::parallel_for(ntotal, + LAMMPS_LAMBDA(int i) { + getMixingWeights(dvector,nspecies,isite1,isite2,fractionalWeighting, + i, mixWtSite1old(i), mixWtSite2old(i), mixWtSite1(i), mixWtSite2(i)); + }); +} + template template void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) @@ -320,12 +425,9 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) mixWtSite1_ = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); mixWtSite2_ = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); - typename DAT::t_float_2d_randomread d_dvector = atomKK->k_dvector.view(); - -//Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) { -// getMixingWeights(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i), -// mixWtSite1_(i), mixWtSite2_(i)); -//}); + getAllMixingWeights(ntotal, atomKK->k_dvector.template view(), + nspecies, isite1, isite2, fractionalWeighting, + mixWtSite1old_, mixWtSite2old_, mixWtSite1_, mixWtSite2_); if (neighflag == N2) error->all(FLERR,"pair table/rx/kk can't handle N2 yet\n"); @@ -848,9 +950,13 @@ double PairTableRXKokkos::single(int i, int j, int itype, int jtype, atomKK->k_dvector.template sync(); typename ArrayTypes::t_float_2d_randomread h_dvector = atomKK->k_dvector.view(); - getMixingWeights(h_dvector,i,mixWtSite1old_i,mixWtSite2old_i, + getMixingWeights(h_dvector, + nspecies, isite1, isite2, fractionalWeighting, + i,mixWtSite1old_i,mixWtSite2old_i, mixWtSite1_i,mixWtSite2_i); - getMixingWeights(h_dvector,j,mixWtSite1old_j,mixWtSite2old_j, + getMixingWeights(h_dvector, + nspecies, isite1, isite2, fractionalWeighting, + j,mixWtSite1old_j,mixWtSite2old_j, mixWtSite1_j,mixWtSite2_j); if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff"); @@ -948,83 +1054,6 @@ void PairTableRXKokkos::cleanup_copy() { vatom = NULL; h_table=NULL; d_table=NULL; } - -template -template -KOKKOS_INLINE_FUNCTION -void PairTableRXKokkos::getMixingWeights( - typename ArrayTypes::t_float_2d_randomread dvector, - int id, - double &mixWtSite1old, double &mixWtSite2old, - double &mixWtSite1, double &mixWtSite2) { - double fractionOFAold, fractionOFA; - double fractionOld1, fraction1; - double fractionOld2, fraction2; - double nMoleculesOFAold, nMoleculesOFA; - double nMoleculesOld1, nMolecules1; - double nMoleculesOld2, nMolecules2; - double nTotal, nTotalOld; - - nTotal = 0.0; - nTotalOld = 0.0; - for (int ispecies = 0; ispecies < nspecies; ++ispecies){ - nTotal += dvector(ispecies,id); - nTotalOld += dvector(ispecies+nspecies,id); - } - - if (isOneFluid(isite1) == false){ - nMoleculesOld1 = dvector(isite1+nspecies,id); - nMolecules1 = dvector(isite1,id); - fractionOld1 = nMoleculesOld1/nTotalOld; - fraction1 = nMolecules1/nTotal; - } - if (isOneFluid(isite2) == false){ - nMoleculesOld2 = dvector(isite2+nspecies,id); - nMolecules2 = dvector(isite2,id); - fractionOld2 = nMoleculesOld2/nTotalOld; - fraction2 = nMolecules2/nTotal; - } - - if (isOneFluid(isite1) || isOneFluid(isite2)){ - nMoleculesOFAold = 0.0; - nMoleculesOFA = 0.0; - fractionOFAold = 0.0; - fractionOFA = 0.0; - - for (int ispecies = 0; ispecies < nspecies; ispecies++){ - if (isite1 == ispecies || isite2 == ispecies) continue; - nMoleculesOFAold += dvector(ispecies+nspecies,id); - nMoleculesOFA += dvector(ispecies,id); - fractionOFAold += dvector(ispecies+nspecies,id)/nTotalOld; - fractionOFA += dvector(ispecies,id)/nTotal; - } - if(isOneFluid(isite1)){ - nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold); - nMolecules1 = 1.0-(nTotal-nMoleculesOFA); - fractionOld1 = fractionOFAold; - fraction1 = fractionOFA; - } - if(isOneFluid(isite2)){ - nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold); - nMolecules2 = 1.0-(nTotal-nMoleculesOFA); - fractionOld2 = fractionOFAold; - fraction2 = fractionOFA; - } - } - - if(fractionalWeighting){ - mixWtSite1old = fractionOld1; - mixWtSite1 = fraction1; - mixWtSite2old = fractionOld2; - mixWtSite2 = fraction2; - } else { - mixWtSite1old = nMoleculesOld1; - mixWtSite1 = nMolecules1; - mixWtSite2old = nMoleculesOld2; - mixWtSite2 = nMolecules2; - } -} - namespace LAMMPS_NS { template class PairTableRXKokkos; #ifdef KOKKOS_HAVE_CUDA From cb9fdf7801bed0d31e08286fee04655f13ea0435 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 11:44:54 -0700 Subject: [PATCH 070/267] starting to separate compute_item from the class --- src/KOKKOS/neigh_list_kokkos.h | 10 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 211 ++++++++++++++++++---------- src/KOKKOS/pair_table_rx_kokkos.h | 7 - 3 files changed, 148 insertions(+), 80 deletions(-) diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index 45e768927c..32e6e704ae 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -48,7 +48,7 @@ class AtomNeighborsConst const int num_neighs; KOKKOS_INLINE_FUNCTION - AtomNeighborsConst(int* const & firstneigh, const int & _num_neighs, + AtomNeighborsConst(const int* const & firstneigh, const int & _num_neighs, const int & stride): _firstneigh(firstneigh), num_neighs(_num_neighs), _stride(stride) {}; KOKKOS_INLINE_FUNCTION @@ -87,6 +87,14 @@ public: &d_neighbors(i,1)-&d_neighbors(i,0)); } + KOKKOS_INLINE_FUNCTION + static AtomNeighborsConst static_neighbors_const(int i, + typename ArrayTypes::t_neighbors_2d_const d_neighbors, + typename ArrayTypes::t_int_1d d_numneigh) { + return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i), + &d_neighbors(i,1)-&d_neighbors(i,0)); + } + KOKKOS_INLINE_FUNCTION AtomNeighborsConst get_neighbors_const(const int &i) const { return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i), diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 63db613538..c96da87d2f 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -31,6 +31,7 @@ #include "error.h" #include "atom_masks.h" #include "fix.h" +#include "kokkos_few.h" #include using namespace LAMMPS_NS; @@ -192,101 +193,167 @@ PairTableRXKokkos::Functor::~Functor //list.clean_copy(); } -template -template -template +KOKKOS_INLINE_FUNCTION static int sbmask(const int& j) const +{ + return j >> SBBITS & 3; +} + +template KOKKOS_INLINE_FUNCTION -EV_FLOAT -PairTableRXKokkos::Functor:: -compute_item(const int& ii) const { +static EV_FLOAT compute_item(int ii, + typename ArrayTypes::t_in_1d_const d_ilist, + typename ArrayTypes::t_neighbors_2d_const d_neighbors, + typename ArrayTypes::t_in_1d_const d_numneigh, + typename ArrayTypes::t_x_array_randomread x, + typename ArrayTypes::t_int_1d_randomread type, + Kokkos::View mixWtSite1old, + Kokkos::View mixWtSite2old, + Kokkos::View mixWtSite1, + Kokkos::View mixWtSite2, + Few special_lj, + Few, MAX_TYPES_STACKPARAMS+1> m_cutsq, + typename ArrayTypes::t_ffloat_2d d_cutsq, + Kokkos::View::t_f_array::array_layout, + DeviceType, + Kokkos::MemoryTraits::value> > f; + Kokkos::View::value> > uCG; + Kokkos::View::value> > uCGnew; + ) { EV_FLOAT ev; -//const int i = list.d_ilist[ii]; -//const X_FLOAT xtmp = c.x(i,0); -//const X_FLOAT ytmp = c.x(i,1); -//const X_FLOAT ztmp = c.x(i,2); -//const int itype = c.type(i); + auto i = d_ilist(ii); + auto xtmp = x(i,0); + auto ytmp = x(i,1); + auto ztmp = x(i,2); + auto itype = type(i); -//const AtomNeighborsConst jlist = list.get_neighbors_const(i); -//const int jnum = list.d_numneigh[i]; + auto jlist = NeighListKokkos::static_neighbors_const(i, + d_neighbors, d_numneigh); + auto jnum = d_numneigh(i); -//double uCG_i = 0.0; -//double uCGnew_i = 0.0; -//double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; + double uCG_i = 0.0; + double uCGnew_i = 0.0; + double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0; -//double mixWtSite1old_i = c.mixWtSite1old_(i); -//double mixWtSite2old_i = c.mixWtSite2old_(i); -//double mixWtSite1_i = c.mixWtSite1_(i); -//double mixWtSite2_i = c.mixWtSite2_(i); + auto mixWtSite1old_i = mixWtSite1old(i); + auto mixWtSite2old_i = mixWtSite2old(i); + auto mixWtSite1_i = mixWtSite1(i); + auto mixWtSite2_i = mixWtSite2(i); -//for (int jj = 0; jj < jnum; jj++) { -// int j = jlist(jj); -// const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; -// j &= NEIGHMASK; + for (int jj = 0; jj < jnum; jj++) { + auto j = jlist(jj); + const F_FLOAT factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; -// const X_FLOAT delx = xtmp - c.x(j,0); -// const X_FLOAT dely = ytmp - c.x(j,1); -// const X_FLOAT delz = ztmp - c.x(j,2); -// const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; -// const int jtype = c.type(j); + auto delx = xtmp - x(j,0); + auto dely = ytmp - x(j,1); + auto delz = ztmp - x(j,2); + auto rsq = delx*delx + dely*dely + delz*delz; + auto jtype = type(j); -// if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { -// double mixWtSite1old_j = c.mixWtSite1old_(j); -// double mixWtSite2old_j = c.mixWtSite2old_(j); -// double mixWtSite1_j = c.mixWtSite1_(j); -// double mixWtSite2_j = c.mixWtSite2_(j); + if(rsq < (STACKPARAMS ? m_cutsq[itype][jtype] : d_cutsq(itype,jtype))) { + auto mixWtSite1old_j = mixWtSite1old_(j); + auto mixWtSite2old_j = mixWtSite2old_(j); + auto mixWtSite1_j = mixWtSite1_(j); + auto mixWtSite2_j = mixWtSite2_(j); -// const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + auto fpair = factor_lj * compute_fpair( + rsq,i,j,itype,jtype); -// fx_i += delx*fpair; -// fy_i += dely*fpair; -// fz_i += delz*fpair; + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; -// bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && -// (NEWTON_PAIR || j < c.nlocal); -// if (do_half) { -// f(j,0) -= delx*fpair; -// f(j,1) -= dely*fpair; -// f(j,2) -= delz*fpair; -// } + auto do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && + (NEWTON_PAIR || j < c.nlocal); + if (do_half) { + f(j,0) -= delx*fpair; + f(j,1) -= dely*fpair; + f(j,2) -= delz*fpair; + } -// auto evdwl = c.template compute_evdwl(rsq,i,j,itype,jtype); + auto evdwl = compute_evdwl( + rsq,i,j,itype,jtype); -// double evdwlOld; -// if (c.isite1 == c.isite2) { -// evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; -// evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; -// } else { -// evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + -// sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; -// evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + -// sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; -// } -// evdwlOld *= factor_lj; -// evdwl *= factor_lj; + double evdwlOld; + if (c.isite1 == c.isite2) { + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; + evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; + } else { + evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + + sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl; + evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) + + sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl; + } + evdwlOld *= factor_lj; + evdwl *= factor_lj; -// uCG_i += 0.5*evdwlOld; -// if (do_half) uCG(j) += 0.5*evdwlOld; + uCG_i += 0.5*evdwlOld; + if (do_half) uCG(j) += 0.5*evdwlOld; -// uCGnew_i += 0.5*evdwl; -// if (do_half) uCGnew(j) += 0.5*evdwl; -// evdwl = evdwlOld; + uCGnew_i += 0.5*evdwl; + if (do_half) uCGnew(j) += 0.5*evdwl; + evdwl = evdwlOld; -// ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; + ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; -// if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); -// } -//} + if (EVFLAG) { + ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + } -//uCG(i) += uCG_i; -//uCGnew(i) += uCGnew_i; + uCG(i) += uCG_i; + uCGnew(i) += uCGnew_i; -//f(i,0) += fx_i; -//f(i,1) += fy_i; -//f(i,2) += fz_i; + f(i,0) += fx_i; + f(i,1) += fy_i; + f(i,2) += fz_i; return ev; } +template +static void compute_all_items( + int eflag, int vflag, + int newton_pair, + EV_FLOAT& ev, + Kokkos::View mixWtSite1old, + Kokkos::View mixWtSite2old, + Kokkos::View mixWtSite1, + Kokkos::View mixWtSite2, + int inum, + if (eflag || vflag) { + Kokkos::parallel_reduce(inum, + LAMMPS_LAMBDA(int i, EV_FLOAT& energy_virial) { + if (newton_pair) { + energy_virial += + compute_item( + ); + } else { + energy_virial += + compute_item( + ); + energy_virial += compute_item<1,0>(i); + } + }, ev); + } else { + Kokkos::parallel_for(inum, + LAMMPS_LAMBDA(int i) { + if (newton_pair) { + compute_item( + ); + } else { + compute_item( + ); + } + }, ev); + } +} + template template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index b71f57076d..33f96d4c32 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -85,10 +85,8 @@ class PairTableRXKokkos : public PairTable { virtual void allocate(); void compute_table(Table *); - typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array_const c_x; typename ArrayTypes::t_f_array f; - typename ArrayTypes::t_int_1d_randomread type; typename ArrayTypes::t_efloat_1d uCG; typename ArrayTypes::t_efloat_1d uCGnew; typename ArrayTypes::t_efloat_1d d_eatom; @@ -117,11 +115,6 @@ class PairTableRXKokkos : public PairTable { int isite1, isite2; bool fractionalWeighting; - template - KOKKOS_INLINE_FUNCTION - void getMixingWeights(typename ArrayTypes::t_float_2d_randomread, - int, double &, double &, double &, double &); - Kokkos::View mixWtSite1old_; Kokkos::View mixWtSite2old_; Kokkos::View mixWtSite1_; From c2bb20e60f8396158f6f712386a54a75b7d5ac43 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 11:54:01 -0700 Subject: [PATCH 071/267] made compute_fpair a free function as well --- src/KOKKOS/pair_table_rx_kokkos.cpp | 81 +++++++++++++++-------------- src/KOKKOS/pair_table_rx_kokkos.h | 4 -- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index c96da87d2f..26c5de87e4 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -198,6 +198,40 @@ KOKKOS_INLINE_FUNCTION static int sbmask(const int& j) const return j >> SBBITS & 3; } +template +KOKKOS_INLINE_FUNCTION +static F_FLOAT +compute_fpair(F_FLOAT rsq, + int itype, int jtype, + PairTableRXKokkos::TableDeviceConst d_table_const, + ) { + union_int_float_t rsq_lookup; + double fpair; + const int tidx = d_table_const.tabindex(itype,jtype); + if (TABSTYLE == LOOKUP) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + fpair = d_table_const.f(tidx,itable); + } else if (TABSTYLE == LINEAR) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); + } else if (TABSTYLE == SPLINE) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + const double a = 1.0 - b; + fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) + + ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) * + d_table_const.deltasq6(tidx); + } else { + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & d_table_const.nmask(tidx); + itable >>= d_table_const.nshiftbits(tidx); + const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); + fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); + } + return fpair; +} + template KOKKOS_INLINE_FUNCTION @@ -222,6 +256,8 @@ static EV_FLOAT compute_item(int ii, device_type,Kokkos::MemoryTraits::value> > uCG; Kokkos::View::value> > uCGnew; + int isite1, int isite2, + PairTableRXKokkos::TableDeviceConst d_table_const, ) { EV_FLOAT ev; auto i = d_ilist(ii); @@ -260,8 +296,12 @@ static EV_FLOAT compute_item(int ii, auto mixWtSite1_j = mixWtSite1_(j); auto mixWtSite2_j = mixWtSite2_(j); - auto fpair = factor_lj * compute_fpair( - rsq,i,j,itype,jtype); + auto fpair = factor_lj * compute_fpair( + rsq,itype,jtype,d_table_const); + + if (isite1 == isite2) fpair *= sqrt(mixWtSite1old_i * mixWtSite2old_j); + else fpair *= (sqrt(mixWtSite1old_i * mixWtSite2old_j) + + sqrt(mixWtSite2old_i * mixWtSite1old_j)); fx_i += delx*fpair; fy_i += dely*fpair; @@ -279,7 +319,7 @@ static EV_FLOAT compute_item(int ii, rsq,i,j,itype,jtype); double evdwlOld; - if (c.isite1 == c.isite2) { + if (isite1 == isite2) { evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl; evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl; } else { @@ -550,41 +590,6 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (vflag_fdotr) pair_virial_fdotr_compute(this); } -template -template -KOKKOS_INLINE_FUNCTION -F_FLOAT PairTableRXKokkos:: -compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - union_int_float_t rsq_lookup; - double fpair; - const int tidx = d_table_const.tabindex(itype,jtype); - if (TABSTYLE == LOOKUP) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - fpair = d_table_const.f(tidx,itable); - } else if (TABSTYLE == LINEAR) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - } else if (TABSTYLE == SPLINE) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - const double a = 1.0 - b; - fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) + - ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) * - d_table_const.deltasq6(tidx); - } else { - rsq_lookup.f = rsq; - int itable = rsq_lookup.i & d_table_const.nmask(tidx); - itable >>= d_table_const.nshiftbits(tidx); - const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); - fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - } - if (isite1 == isite2) fpair *= sqrt(mixWtSite1old_(i) * mixWtSite2old_(j)); - else fpair *= (sqrt(mixWtSite1old_(i) * mixWtSite2old_(j)) + - sqrt(mixWtSite2old_(i) * mixWtSite1old_(j))); - return fpair; -} - template template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 33f96d4c32..b2814adcec 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -98,10 +98,6 @@ class PairTableRXKokkos : public PairTable { void create_kokkos_tables(); void cleanup_copy(); - template - KOKKOS_INLINE_FUNCTION - F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; - template KOKKOS_INLINE_FUNCTION F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; From 41804ff52464f6c60ae3a0b32ead2f0a5386d733 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 12:42:05 -0700 Subject: [PATCH 072/267] progress converting compute_style --- src/KOKKOS/pair_table_rx_kokkos.cpp | 234 +++++++++++++++------------- src/KOKKOS/pair_table_rx_kokkos.h | 11 +- 2 files changed, 126 insertions(+), 119 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 26c5de87e4..a9703dd927 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -232,10 +232,48 @@ compute_fpair(F_FLOAT rsq, return fpair; } +template +KOKKOS_INLINE_FUNCTION +static F_FLOAT +compute_evdwl( + F_FLOAT rsq, + int itype, int jtype, + PairTableRXKokkos::TableDeviceConst d_table_const, + ) const { + double evdwl; + union_int_float_t rsq_lookup; + const int tidx = d_table_const.tabindex(itype,jtype); + if (TABSTYLE == LOOKUP) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + evdwl = d_table_const.e(tidx,itable); + } else if (TABSTYLE == LINEAR) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); + } else if (TABSTYLE == SPLINE) { + const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); + const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); + const double a = 1.0 - b; + evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) + + ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) * + d_table_const.deltasq6(tidx); + } else { + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & d_table_const.nmask(tidx); + itable >>= d_table_const.nshiftbits(tidx); + const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); + evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); + } + return evdwl; +} + template KOKKOS_INLINE_FUNCTION -static EV_FLOAT compute_item(int ii, +static EV_FLOAT +compute_item( + int ii, + int nlocal, typename ArrayTypes::t_in_1d_const d_ilist, typename ArrayTypes::t_neighbors_2d_const d_neighbors, typename ArrayTypes::t_in_1d_const d_numneigh, @@ -308,7 +346,7 @@ static EV_FLOAT compute_item(int ii, fz_i += delz*fpair; auto do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && - (NEWTON_PAIR || j < c.nlocal); + (NEWTON_PAIR || j < nlocal); if (do_half) { f(j,0) -= delx*fpair; f(j,1) -= dely*fpair; @@ -316,7 +354,7 @@ static EV_FLOAT compute_item(int ii, } auto evdwl = compute_evdwl( - rsq,i,j,itype,jtype); + rsq,itype,jtype,d_table_const); double evdwlOld; if (isite1 == isite2) { @@ -361,22 +399,47 @@ static void compute_all_items( int eflag, int vflag, int newton_pair, EV_FLOAT& ev, + int nlocal, + int inum, + typename ArrayTypes::t_in_1d_const d_ilist, + typename ArrayTypes::t_neighbors_2d_const d_neighbors, + typename ArrayTypes::t_in_1d_const d_numneigh, + typename ArrayTypes::t_x_array_randomread x, + typename ArrayTypes::t_int_1d_randomread type, Kokkos::View mixWtSite1old, Kokkos::View mixWtSite2old, Kokkos::View mixWtSite1, Kokkos::View mixWtSite2, - int inum, + Few special_lj, + Few, MAX_TYPES_STACKPARAMS+1> m_cutsq, + typename ArrayTypes::t_ffloat_2d d_cutsq, + Kokkos::View::t_f_array::array_layout, + DeviceType, + Kokkos::MemoryTraits::value> > f; + Kokkos::View::value> > uCG; + Kokkos::View::value> > uCGnew; + int isite1, int isite2, + PairTableRXKokkos::TableDeviceConst d_table_const) { if (eflag || vflag) { Kokkos::parallel_reduce(inum, LAMMPS_LAMBDA(int i, EV_FLOAT& energy_virial) { if (newton_pair) { energy_virial += compute_item( - ); + i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, + mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } else { energy_virial += compute_item( - ); + i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, + mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); energy_virial += compute_item<1,0>(i); } }, ev); @@ -385,10 +448,16 @@ static void compute_all_items( LAMMPS_LAMBDA(int i) { if (newton_pair) { compute_item( - ); + i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, + mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } else { compute_item( - ); + i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, + mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } }, ev); } @@ -454,26 +523,6 @@ ev_tally(EV_FLOAT &ev, const int &i, const int &j, //} } -template -template -KOKKOS_INLINE_FUNCTION -void -PairTableRXKokkos::Functor:: -operator()(const int i) const { -//if (c.newton_pair) compute_item<0,1>(i); -//else compute_item<0,0>(i); -} - -template -template -KOKKOS_INLINE_FUNCTION -void -PairTableRXKokkos::Functor:: -operator()(const int i, value_type &energy_virial) const { -//if (c.newton_pair) energy_virial += compute_item<1,1>(i); -//else energy_virial += compute_item<1,0>(i); -} - template static void getAllMixingWeights( int ntotal, @@ -496,8 +545,8 @@ template template void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) { - eflag = eflag_in; - vflag = vflag_in; + auto eflag = eflag_in; + auto vflag = vflag_in; if (neighflag == FULL) no_virial_fdotr_compute = 1; @@ -511,69 +560,68 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); - x = c_x = atomKK->k_x.view(); - f = atomKK->k_f.view(); - type = atomKK->k_type.view(); - uCG = atomKK->k_uCG.view(); - uCGnew = atomKK->k_uCGnew.view(); - nlocal = atom->nlocal; - nall = atom->nlocal + atom->nghost; - special_lj[0] = force->special_lj[0]; - special_lj[1] = force->special_lj[1]; - special_lj[2] = force->special_lj[2]; - special_lj[3] = force->special_lj[3]; - newton_pair = force->newton_pair; + auto x = atomKK->k_x.view(); + auto f = atomKK->k_f.view(); + auto type = atomKK->k_type.view(); + auto uCG = atomKK->k_uCG.view(); + auto uCGnew = atomKK->k_uCGnew.view(); + auto nlocal = atom->nlocal; + Few special_lj_local; + special_lj_local[0] = force->special_lj[0]; + special_lj_local[1] = force->special_lj[1]; + special_lj_local[2] = force->special_lj[2]; + special_lj_local[3] = force->special_lj[3]; + auto newton_pair = force->newton_pair; d_cutsq = d_table->cutsq; // loop over neighbors of my atoms const int ntotal = atom->nlocal + atom->nghost; - mixWtSite1old_ = Kokkos::View("PairTableRXKokkos::mixWtSite1old", ntotal); - mixWtSite2old_ = Kokkos::View("PairTableRXKokkos::mixWtSite2old", ntotal); - mixWtSite1_ = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); - mixWtSite2_ = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); + auto mixWtSite1old = Kokkos::View("PairTableRXKokkos::mixWtSite1old", ntotal); + auto mixWtSite2old = Kokkos::View("PairTableRXKokkos::mixWtSite2old", ntotal); + auto mixWtSite1 = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); + auto mixWtSite2 = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); getAllMixingWeights(ntotal, atomKK->k_dvector.template view(), nspecies, isite1, isite2, fractionalWeighting, - mixWtSite1old_, mixWtSite2old_, mixWtSite1_, mixWtSite2_); + mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2); if (neighflag == N2) error->all(FLERR,"pair table/rx/kk can't handle N2 yet\n"); + NeighListKokkos* l = + dynamic_cast*>(list); + EV_FLOAT ev; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { - if (neighflag == FULL) { - Functor ff(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - } else if (neighflag == HALFTHREAD) { - Functor ff(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); + if (neighflag == HALFTHREAD) { + compute_all_items( + eflag, vflag, newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } else if (neighflag == HALF) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); - else Kokkos::parallel_for(list->inum,f); - } else if (neighflag == N2) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); - else Kokkos::parallel_for(nlocal,f); + compute_all_items( + eflag, vflag, newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } } else { - if (neighflag == FULL) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); - else Kokkos::parallel_for(list->inum,f); - } else if (neighflag == HALFTHREAD) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); - else Kokkos::parallel_for(list->inum,f); + if (neighflag == HALFTHREAD) { + compute_all_items( + eflag, vflag, newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } else if (neighflag == HALF) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); - else Kokkos::parallel_for(list->inum,f); - } else if (neighflag == N2) { - Functor f(this,(NeighListKokkos*) list); - if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev); - else Kokkos::parallel_for(nlocal,f); + compute_all_items( + eflag, vflag, newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const); } } @@ -590,38 +638,6 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (vflag_fdotr) pair_virial_fdotr_compute(this); } -template -template -KOKKOS_INLINE_FUNCTION -F_FLOAT PairTableRXKokkos:: -compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const { - double evdwl; - union_int_float_t rsq_lookup; - const int tidx = d_table_const.tabindex(itype,jtype); - if (TABSTYLE == LOOKUP) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - evdwl = d_table_const.e(tidx,itable); - } else if (TABSTYLE == LINEAR) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } else if (TABSTYLE == SPLINE) { - const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); - const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); - const double a = 1.0 - b; - evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) + - ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) * - d_table_const.deltasq6(tidx); - } else { - rsq_lookup.f = rsq; - int itable = rsq_lookup.i & d_table_const.nmask(tidx); - itable >>= d_table_const.nshiftbits(tidx); - const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable); - evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } - return evdwl; -} - template void PairTableRXKokkos::create_kokkos_tables() { diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index b2814adcec..36441f78b5 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -92,16 +92,12 @@ class PairTableRXKokkos : public PairTable { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - int nlocal,nall,eflag,vflag,neighflag,newton_pair; + int neighflag; int update_table; void create_kokkos_tables(); void cleanup_copy(); - template - KOKKOS_INLINE_FUNCTION - F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const; - friend void pair_virial_fdotr_compute(PairTableRXKokkos*); /* PairTableRX members */ @@ -111,11 +107,6 @@ class PairTableRXKokkos : public PairTable { int isite1, isite2; bool fractionalWeighting; - Kokkos::View mixWtSite1old_; - Kokkos::View mixWtSite2old_; - Kokkos::View mixWtSite1_; - Kokkos::View mixWtSite2_; - /* a duplicate of PairComputeFunctor to deal with uCG */ template struct Functor { From fdb6b91e29166f0882169946718b1c2d69c114ac Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 12:50:32 -0700 Subject: [PATCH 073/267] near trying to compile --- src/KOKKOS/pair_table_rx_kokkos.cpp | 144 ++++++++++++++-------------- 1 file changed, 71 insertions(+), 73 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index a9703dd927..bed69fa0a0 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -267,6 +267,62 @@ compute_evdwl( return evdwl; } +template +KOKKOS_INLINE_FUNCTION +void +ev_tally( + int vflag_global, + int nlocal, + EV_FLOAT& ev, + F_FLOAT epair, F_FLOAT fpair, + F_FLOAT delx, F_FLOAT dely, F_FLOAT delz) +{ + if (vflag_global) { + auto v0 = delx*delx*fpair; + auto v1 = dely*dely*fpair; + auto v2 = delz*delz*fpair; + auto v3 = delx*dely*fpair; + auto v4 = delx*delz*fpair; + auto v5 = dely*delz*fpair; + + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + if (i < c.nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < c.nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } +} +} + template KOKKOS_INLINE_FUNCTION @@ -296,6 +352,7 @@ compute_item( device_type,Kokkos::MemoryTraits::value> > uCGnew; int isite1, int isite2, PairTableRXKokkos::TableDeviceConst d_table_const, + int vflag_global ) { EV_FLOAT ev; auto i = d_ilist(ii); @@ -379,7 +436,8 @@ compute_item( ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl; if (EVFLAG) { - ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + ev_tally( + vflag_global,nlocal,ev,evdwl,fpair,delx,dely,delz); } } } @@ -422,7 +480,8 @@ static void compute_all_items( Kokkos::View::value> > uCGnew; int isite1, int isite2, - PairTableRXKokkos::TableDeviceConst d_table_const) { + PairTableRXKokkos::TableDeviceConst d_table_const, + int vflag_global) { if (eflag || vflag) { Kokkos::parallel_reduce(inum, LAMMPS_LAMBDA(int i, EV_FLOAT& energy_virial) { @@ -432,97 +491,36 @@ static void compute_all_items( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } else { energy_virial += compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); - energy_virial += compute_item<1,0>(i); + d_table_const, vflag_global); } }, ev); } else { Kokkos::parallel_for(inum, LAMMPS_LAMBDA(int i) { if (newton_pair) { - compute_item( + compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } else { - compute_item( + compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } }, ev); } } -template -template -KOKKOS_INLINE_FUNCTION -void -PairTableRXKokkos::Functor:: -ev_tally(EV_FLOAT &ev, const int &i, const int &j, - const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, - const F_FLOAT &dely, const F_FLOAT &delz) const -{ -//const int EFLAG = c.eflag; -//const int NEWTON_PAIR = c.newton_pair; -//const int VFLAG = c.vflag_either; - -//if (VFLAG) { -// const E_FLOAT v0 = delx*delx*fpair; -// const E_FLOAT v1 = dely*dely*fpair; -// const E_FLOAT v2 = delz*delz*fpair; -// const E_FLOAT v3 = delx*dely*fpair; -// const E_FLOAT v4 = delx*delz*fpair; -// const E_FLOAT v5 = dely*delz*fpair; - -// if (c.vflag_global) { -// if (NEIGHFLAG!=FULL) { -// if (NEWTON_PAIR) { -// ev.v[0] += v0; -// ev.v[1] += v1; -// ev.v[2] += v2; -// ev.v[3] += v3; -// ev.v[4] += v4; -// ev.v[5] += v5; -// } else { -// if (i < c.nlocal) { -// ev.v[0] += 0.5*v0; -// ev.v[1] += 0.5*v1; -// ev.v[2] += 0.5*v2; -// ev.v[3] += 0.5*v3; -// ev.v[4] += 0.5*v4; -// ev.v[5] += 0.5*v5; -// } -// if (j < c.nlocal) { -// ev.v[0] += 0.5*v0; -// ev.v[1] += 0.5*v1; -// ev.v[2] += 0.5*v2; -// ev.v[3] += 0.5*v3; -// ev.v[4] += 0.5*v4; -// ev.v[5] += 0.5*v5; -// } -// } -// } else { -// ev.v[0] += 0.5*v0; -// ev.v[1] += 0.5*v1; -// ev.v[2] += 0.5*v2; -// ev.v[3] += 0.5*v3; -// ev.v[4] += 0.5*v4; -// ev.v[5] += 0.5*v5; -// } -// } -//} -} - template static void getAllMixingWeights( int ntotal, @@ -598,14 +596,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } else if (neighflag == HALF) { compute_all_items( eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } } else { if (neighflag == HALFTHREAD) { @@ -614,14 +612,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } else if (neighflag == HALF) { compute_all_items( eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const); + d_table_const, vflag_global); } } From 5dcbbba4ce53654bc40dc363872878358bfc73b5 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 13:15:01 -0700 Subject: [PATCH 074/267] lots of work towards compiling --- src/KOKKOS/pair_table_rx_kokkos.cpp | 88 ++++++++++++++--------------- src/KOKKOS/pair_table_rx_kokkos.h | 7 +-- src/pair.h | 2 + src/pair_table.h | 2 +- 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index bed69fa0a0..c6206b828b 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -182,7 +182,7 @@ void PairTableRXKokkos::compute(int eflag_in, int vflag_in) template template PairTableRXKokkos::Functor::Functor( - PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr)//: + PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr)//: //c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr) {} @@ -193,7 +193,7 @@ PairTableRXKokkos::Functor::~Functor //list.clean_copy(); } -KOKKOS_INLINE_FUNCTION static int sbmask(const int& j) const +KOKKOS_INLINE_FUNCTION static int sbmask(const int& j) { return j >> SBBITS & 3; } @@ -203,19 +203,19 @@ KOKKOS_INLINE_FUNCTION static F_FLOAT compute_fpair(F_FLOAT rsq, int itype, int jtype, - PairTableRXKokkos::TableDeviceConst d_table_const, + typename PairTableRXKokkos::TableDeviceConst d_table_const ) { - union_int_float_t rsq_lookup; + Pair::union_int_float_t rsq_lookup; double fpair; const int tidx = d_table_const.tabindex(itype,jtype); - if (TABSTYLE == LOOKUP) { + if (TABSTYLE == PairTable::LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); fpair = d_table_const.f(tidx,itable); - } else if (TABSTYLE == LINEAR) { + } else if (TABSTYLE == PairTable::LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable); - } else if (TABSTYLE == SPLINE) { + } else if (TABSTYLE == PairTable::SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; @@ -238,19 +238,19 @@ static F_FLOAT compute_evdwl( F_FLOAT rsq, int itype, int jtype, - PairTableRXKokkos::TableDeviceConst d_table_const, - ) const { + typename PairTableRXKokkos::TableDeviceConst d_table_const + ) { double evdwl; - union_int_float_t rsq_lookup; + Pair::union_int_float_t rsq_lookup; const int tidx = d_table_const.tabindex(itype,jtype); - if (TABSTYLE == LOOKUP) { + if (TABSTYLE == PairTable::LOOKUP) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); evdwl = d_table_const.e(tidx,itable); - } else if (TABSTYLE == LINEAR) { + } else if (TABSTYLE == PairTable::LINEAR) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable); - } else if (TABSTYLE == SPLINE) { + } else if (TABSTYLE == PairTable::SPLINE) { const int itable = static_cast ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx); const double a = 1.0 - b; @@ -273,6 +273,7 @@ void ev_tally( int vflag_global, int nlocal, + int i, int j, EV_FLOAT& ev, F_FLOAT epair, F_FLOAT fpair, F_FLOAT delx, F_FLOAT dely, F_FLOAT delz) @@ -294,7 +295,7 @@ ev_tally( ev.v[4] += v4; ev.v[5] += v5; } else { - if (i < c.nlocal) { + if (i < nlocal) { ev.v[0] += 0.5*v0; ev.v[1] += 0.5*v1; ev.v[2] += 0.5*v2; @@ -302,7 +303,7 @@ ev_tally( ev.v[4] += 0.5*v4; ev.v[5] += 0.5*v5; } - if (j < c.nlocal) { + if (j < nlocal) { ev.v[0] += 0.5*v0; ev.v[1] += 0.5*v1; ev.v[2] += 0.5*v2; @@ -321,7 +322,6 @@ ev_tally( } } } -} template @@ -330,9 +330,9 @@ static EV_FLOAT compute_item( int ii, int nlocal, - typename ArrayTypes::t_in_1d_const d_ilist, - typename ArrayTypes::t_neighbors_2d_const d_neighbors, - typename ArrayTypes::t_in_1d_const d_numneigh, + typename ArrayTypes::t_int_1d_const d_ilist, + typename ArrayTypes::t_neighbors_2d_const d_neighbors, + typename ArrayTypes::t_int_1d_const d_numneigh, typename ArrayTypes::t_x_array_randomread x, typename ArrayTypes::t_int_1d_randomread type, Kokkos::View mixWtSite1old, @@ -345,13 +345,13 @@ compute_item( Kokkos::View::t_f_array::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > f; + Kokkos::MemoryTraits::value> > f, Kokkos::View::value> > uCG; + DeviceType,Kokkos::MemoryTraits::value> > uCG, Kokkos::View::value> > uCGnew; + DeviceType,Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, - PairTableRXKokkos::TableDeviceConst d_table_const, + typename PairTableRXKokkos::TableDeviceConst d_table_const, int vflag_global ) { EV_FLOAT ev; @@ -386,10 +386,10 @@ compute_item( auto jtype = type(j); if(rsq < (STACKPARAMS ? m_cutsq[itype][jtype] : d_cutsq(itype,jtype))) { - auto mixWtSite1old_j = mixWtSite1old_(j); - auto mixWtSite2old_j = mixWtSite2old_(j); - auto mixWtSite1_j = mixWtSite1_(j); - auto mixWtSite2_j = mixWtSite2_(j); + auto mixWtSite1old_j = mixWtSite1old(j); + auto mixWtSite2old_j = mixWtSite2old(j); + auto mixWtSite1_j = mixWtSite1(j); + auto mixWtSite2_j = mixWtSite2(j); auto fpair = factor_lj * compute_fpair( rsq,itype,jtype,d_table_const); @@ -437,7 +437,7 @@ compute_item( if (EVFLAG) { ev_tally( - vflag_global,nlocal,ev,evdwl,fpair,delx,dely,delz); + vflag_global,nlocal,i,j,ev,evdwl,fpair,delx,dely,delz); } } } @@ -459,9 +459,9 @@ static void compute_all_items( EV_FLOAT& ev, int nlocal, int inum, - typename ArrayTypes::t_in_1d_const d_ilist, - typename ArrayTypes::t_neighbors_2d_const d_neighbors, - typename ArrayTypes::t_in_1d_const d_numneigh, + typename ArrayTypes::t_int_1d_const d_ilist, + typename ArrayTypes::t_neighbors_2d_const d_neighbors, + typename ArrayTypes::t_int_1d_const d_numneigh, typename ArrayTypes::t_x_array_randomread x, typename ArrayTypes::t_int_1d_randomread type, Kokkos::View mixWtSite1old, @@ -474,13 +474,13 @@ static void compute_all_items( Kokkos::View::t_f_array::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > f; + Kokkos::MemoryTraits::value> > f, Kokkos::View::value> > uCG; + DeviceType,Kokkos::MemoryTraits::value> > uCG, Kokkos::View::value> > uCGnew; + DeviceType,Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, - PairTableRXKokkos::TableDeviceConst d_table_const, + typename PairTableRXKokkos::TableDeviceConst d_table_const, int vflag_global) { if (eflag || vflag) { Kokkos::parallel_reduce(inum, @@ -517,7 +517,7 @@ static void compute_all_items( special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, vflag_global); } - }, ev); + }); } } @@ -558,8 +558,8 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); - auto x = atomKK->k_x.view(); - auto f = atomKK->k_f.view(); + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); auto type = atomKK->k_type.view(); auto uCG = atomKK->k_uCG.view(); auto uCGnew = atomKK->k_uCGnew.view(); @@ -595,14 +595,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, vflag_global); } else if (neighflag == HALF) { compute_all_items( eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, vflag_global); } } else { @@ -611,14 +611,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, vflag_global); } else if (neighflag == HALF) { - compute_all_items( + compute_all_items( eflag, vflag, newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, vflag_global); } } @@ -1067,7 +1067,7 @@ double PairTableRXKokkos::single(int i, int j, int itype, int jtype, tb->deltasq6; fforce = factor_lj * value; } else { - union_int_float_t rsq_lookup; + Pair::union_int_float_t rsq_lookup; rsq_lookup.f = rsq; itable = rsq_lookup.i & tb->nmask; itable >>= tb->nshiftbits; diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 36441f78b5..fdd863e4bc 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -23,6 +23,7 @@ PairStyle(table/rx/kk/host,PairTableRXKokkos) #define LMP_PAIR_TABLE_RX_KOKKOS_H #include "pair_table_kokkos.h" +#include "kokkos_few.h" namespace LAMMPS_NS { @@ -78,17 +79,15 @@ class PairTableRXKokkos : public PairTable { TableDevice* d_table; TableHost* h_table; - F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + Few, MAX_TYPES_STACKPARAMS+1> m_cutsq; typename ArrayTypes::t_ffloat_2d d_cutsq; virtual void allocate(); void compute_table(Table *); - typename ArrayTypes::t_x_array_const c_x; + typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_f_array f; - typename ArrayTypes::t_efloat_1d uCG; - typename ArrayTypes::t_efloat_1d uCGnew; typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; diff --git a/src/pair.h b/src/pair.h index 3378115e49..ecb54bcf4d 100644 --- a/src/pair.h +++ b/src/pair.h @@ -211,10 +211,12 @@ class Pair : protected Pointers { double tabinner; // inner cutoff for Coulomb table double tabinner_disp; // inner cutoff for dispersion table + public: // custom data type for accessing Coulomb tables typedef union {int i; float f;} union_int_float_t; + protected: int vflag_fdotr; int maxeatom,maxvatom; diff --git a/src/pair_table.h b/src/pair_table.h index caffebdf31..b723fd2d98 100644 --- a/src/pair_table.h +++ b/src/pair_table.h @@ -40,9 +40,9 @@ class PairTable : public Pair { virtual double single(int, int, int, int, double, double, double, double &); void *extract(const char *, int &); - protected: enum{LOOKUP,LINEAR,SPLINE,BITMAP}; + protected: int tabstyle,tablength; struct Table { int ninput,rflag,fpflag,match,ntablebits; From 52761aee0dc0ab1a6319c9d8ab7baa1f6940351b Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 11 Jan 2017 13:18:13 -0700 Subject: [PATCH 075/267] it compiles. --- src/KOKKOS/neigh_list_kokkos.h | 2 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index 32e6e704ae..b43e1106f2 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -90,7 +90,7 @@ public: KOKKOS_INLINE_FUNCTION static AtomNeighborsConst static_neighbors_const(int i, typename ArrayTypes::t_neighbors_2d_const d_neighbors, - typename ArrayTypes::t_int_1d d_numneigh) { + typename ArrayTypes::t_int_1d_const d_numneigh) { return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i), &d_neighbors(i,1)-&d_neighbors(i,0)); } diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index c6206b828b..2a9e1bb13b 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -410,7 +410,7 @@ compute_item( f(j,2) -= delz*fpair; } - auto evdwl = compute_evdwl( + auto evdwl = compute_evdwl( rsq,itype,jtype,d_table_const); double evdwlOld; From 3580e5409de58417370feae7eb7727ef9480fbde Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 12 Jan 2017 09:00:07 -0700 Subject: [PATCH 076/267] Fixing Kokkos CUDA compile error --- lib/kokkos/Makefile.kokkos | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 73a332ee11..94d0452428 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -20,7 +20,7 @@ KOKKOS_OPTIONS ?= "" #Default settings specific options #Options: force_uvm,use_ldg,rdc,enable_lambda -KOKKOS_CUDA_OPTIONS ?= "" +KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Check for general settings From 0c3b9426862c4da7730834a194a6e936c7593a7a Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 12 Jan 2017 13:50:30 -0700 Subject: [PATCH 077/267] cleanup changes to Install.sh --- src/KOKKOS/Install.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index e76f62d65d..cf753ecee8 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -28,8 +28,8 @@ action () { # force rebuild of files with LMP_KOKKOS switch -#touch ../accelerator_kokkos.h -#touch ../memory.h +touch ../accelerator_kokkos.h +touch ../memory.h # list of files with optional dependcies @@ -196,7 +196,7 @@ action pair_vashishta_kokkos.h pair_vashishta.h action pair_table_kokkos.cpp action pair_table_kokkos.h action pair_table_rx_kokkos.cpp pair_table_rx.cpp -action pair_table_rx_kokkos.h pair_table_rx.h +action pair_table_rx_kokkos.h pair_table_rx.h action pair_tersoff_kokkos.cpp pair_tersoff.cpp action pair_tersoff_kokkos.h pair_tersoff.h action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp From 4dab6737ba59e402bf9a7609e66e24a75c313699 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 12 Jan 2017 14:15:42 -0700 Subject: [PATCH 078/267] remove leftover code --- src/KOKKOS/pair_table_rx_kokkos.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 2a9e1bb13b..66089009a2 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -179,20 +179,6 @@ void PairTableRXKokkos::compute(int eflag_in, int vflag_in) compute_style(eflag_in,vflag_in); } -template -template -PairTableRXKokkos::Functor::Functor( - PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr)//: -//c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr) -{} - -template -template -PairTableRXKokkos::Functor::~Functor() { -//c.cleanup_copy(); -//list.clean_copy(); -} - KOKKOS_INLINE_FUNCTION static int sbmask(const int& j) { return j >> SBBITS & 3; From cce10f6dff0dc0c28729e5787e9bc998751692b5 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Thu, 12 Jan 2017 14:19:10 -0700 Subject: [PATCH 079/267] remove more leftover code --- src/KOKKOS/pair_table_rx_kokkos.h | 33 ------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index fdd863e4bc..4e94802d72 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -106,39 +106,6 @@ class PairTableRXKokkos : public PairTable { int isite1, isite2; bool fractionalWeighting; - /* a duplicate of PairComputeFunctor to deal with uCG */ - template - struct Functor { - using device_type = DeviceType; - typedef EV_FLOAT value_type; - //PairTableRXKokkos c; - // arrays are atomic for Half(Thread) neighbor style - Kokkos::View::value> > f; - Kokkos::View::value> > uCG; - Kokkos::View::value> > uCGnew; - //NeighListKokkos list; - Functor(PairTableRXKokkos* c_ptr, NeighListKokkos* list_ptr); - ~Functor(); - KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { - return j >> SBBITS & 3; - } - template - KOKKOS_INLINE_FUNCTION - EV_FLOAT compute_item(const int&) const; - KOKKOS_INLINE_FUNCTION - void - ev_tally(EV_FLOAT &ev, const int &i, const int &j, - const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, - const F_FLOAT &dely, const F_FLOAT &delz) const; - KOKKOS_INLINE_FUNCTION - void operator()(const int) const; - KOKKOS_INLINE_FUNCTION - void operator()(const int, value_type&) const; - }; - }; } From 0635151e2db0e53b3680f5bc8613c078e99cf901 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 12 Jan 2017 16:22:24 -0700 Subject: [PATCH 080/267] Fixing neighbor bug --- src/neighbor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 59abc29f19..af59391209 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -909,9 +909,10 @@ void Neighbor::init_pair() done = 1; for (i = 0; i < npair_perpetual; i++) { ptr = NULL; - if (lists[plist[i]]->listcopy) ptr = lists[plist[i]]->listcopy; - if (lists[plist[i]]->listskip) ptr = lists[plist[i]]->listskip; if (lists[plist[i]]->listfull) ptr = lists[plist[i]]->listfull; + if (lists[plist[i]]->listcopy) ptr = lists[plist[i]]->listcopy; + // listskip check must be after listfull check + if (lists[plist[i]]->listskip) ptr = lists[plist[i]]->listskip; if (ptr == NULL) continue; for (m = 0; m < nrequest; m++) if (ptr == lists[m]) break; From 5b7ab135dd849b2dbc74118036e872a1e20d2c43 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 12 Jan 2017 16:22:38 -0700 Subject: [PATCH 081/267] Fixing Kokkos neighbor bug --- src/neigh_request.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index a8ba8496cd..7f5d9a6195 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -138,6 +138,8 @@ int NeighRequest::identical(NeighRequest *other) if (ghost != other->ghost) same = 0; if (omp != other->omp) same = 0; if (intel != other->intel) same = 0; + if (kokkos_host != other->kokkos_host) same = 0; + if (kokkos_device != other->kokkos_device) same = 0; if (ssa != other->ssa) same = 0; if (copy != other->copy_original) same = 0; From c15d6580da4174c247b9dc6af108b02b6b3aa47c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 13 Jan 2017 10:01:22 -0700 Subject: [PATCH 082/267] Fixing issue in pair_multi_lucy_rx_kokkos found by ibaned --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 24502f875c..fac1478e32 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -277,7 +277,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute::value> > a_f = f; - int i,j,jj,inum,jnum,itype,jtype,itable; + int i,jj,inum,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq; @@ -431,7 +431,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute From 2a35fa7a4e253facc698e834d47a0ccb2cd2cace Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 13 Jan 2017 10:37:31 -0700 Subject: [PATCH 083/267] Adding initial versions of pair_hybrid_kokkos and pair_hybrid_overlay_kokkos --- src/KOKKOS/Install.sh | 4 + src/KOKKOS/pair_hybrid_kokkos.cpp | 147 ++++++++++++++++++++++ src/KOKKOS/pair_hybrid_kokkos.h | 109 ++++++++++++++++ src/KOKKOS/pair_hybrid_overlay_kokkos.cpp | 28 +++++ src/KOKKOS/pair_hybrid_overlay_kokkos.h | 48 +++++++ src/pair_hybrid.h | 6 +- src/pair_hybrid_overlay.h | 2 +- 7 files changed, 338 insertions(+), 6 deletions(-) create mode 100644 src/KOKKOS/pair_hybrid_kokkos.cpp create mode 100644 src/KOKKOS/pair_hybrid_kokkos.h create mode 100644 src/KOKKOS/pair_hybrid_overlay_kokkos.cpp create mode 100644 src/KOKKOS/pair_hybrid_overlay_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index cf753ecee8..198946d9f0 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -154,6 +154,10 @@ action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp action pair_eam_fs_kokkos.h pair_eam_fs.h action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp action pair_exp6_rx_kokkos.h pair_exp6_rx.h +action pair_hybrid_kokkos.cpp +action pair_hybrid_kokkos.h +action pair_hybrid_overlay_kokkos.cpp +action pair_hybrid_overlay_kokkos.h action pair_kokkos.h action pair_lj_charmm_coul_charmm_implicit_kokkos.cpp pair_lj_charmm_coul_charmm_implicit.cpp action pair_lj_charmm_coul_charmm_implicit_kokkos.h pair_lj_charmm_coul_charmm_implicit.h diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp new file mode 100644 index 0000000000..973d60348f --- /dev/null +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include "pair_hybrid_kokkos.h" +#include "atom_kokkos.h" +#include "force.h" +#include "pair.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "update.h" +#include "comm.h" +#include "memory.h" +#include "error.h" +#include "respa.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairHybridKokkos::PairHybridKokkos(LAMMPS *lmp) : PairHybrid(lmp) +{ + atomKK = (AtomKokkos *) atom; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- */ + +PairHybridKokkos::~PairHybridKokkos() +{ + +} + +/* ---------------------------------------------------------------------- + call each sub-style's compute() or compute_outer() function + accumulate sub-style global/peratom energy/virial in hybrid + for global vflag = 1: + each sub-style computes own virial[6] + sum sub-style virial[6] to hybrid's virial[6] + for global vflag = 2: + call sub-style with adjusted vflag to prevent it calling + virial_fdotr_compute() + hybrid calls virial_fdotr_compute() on final accumulated f +------------------------------------------------------------------------- */ + +void PairHybridKokkos::compute(int eflag, int vflag) +{ + int i,j,m,n; + + // if no_virial_fdotr_compute is set and global component of + // incoming vflag = 2, then + // reset vflag as if global component were 1 + // necessary since one or more sub-styles cannot compute virial as F dot r + + int neighflag = lmp->kokkos->neighflag; + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // check if global component of incoming vflag = 2 + // if so, reset vflag passed to substyle as if it were 0 + // necessary so substyle will not invoke virial_fdotr_compute() + + int vflag_substyle; + if (vflag % 4 == 2) vflag_substyle = vflag/4 * 4; + else vflag_substyle = vflag; + + double *saved_special = save_special(); + + // check if we are running with r-RESPA using the hybrid keyword + + Respa *respa = NULL; + respaflag = 0; + if (strstr(update->integrate_style,"respa")) { + respa = (Respa *) update->integrate; + if (respa->nhybrid_styles > 0) respaflag = 1; + } + + for (m = 0; m < nstyles; m++) { + + set_special(m); + + if (!respaflag || (respaflag && respa->hybrid_compute[m])) { + + // invoke compute() unless compute flag is turned off or + // outerflag is set and sub-style has a compute_outer() method + + if (styles[m]->compute_flag == 0) continue; + atomKK->sync(styles[m]->execution_space,styles[m]->datamask_read); + if (outerflag && styles[m]->respa_enable) + styles[m]->compute_outer(eflag,vflag_substyle); + else styles[m]->compute(eflag,vflag_substyle); + atomKK->modified(styles[m]->execution_space,styles[m]->datamask_modify); + } + + restore_special(saved_special); + + // jump to next sub-style if r-RESPA does not want global accumulated data + + if (respaflag && !respa->tally_global) continue; + + if (eflag_global) { + eng_vdwl += styles[m]->eng_vdwl; + eng_coul += styles[m]->eng_coul; + } + if (vflag_global) { + for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; + } + if (eflag_atom) { + n = atom->nlocal; + if (force->newton_pair) n += atom->nghost; + double *eatom_substyle = styles[m]->eatom; + for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; + } + if (vflag_atom) { + n = atom->nlocal; + if (force->newton_pair) n += atom->nghost; + double **vatom_substyle = styles[m]->vatom; + for (i = 0; i < n; i++) + for (j = 0; j < 6; j++) + vatom[i][j] += vatom_substyle[i][j]; + } + } + + delete [] saved_special; + + if (vflag_fdotr) virial_fdotr_compute(); +} diff --git a/src/KOKKOS/pair_hybrid_kokkos.h b/src/KOKKOS/pair_hybrid_kokkos.h new file mode 100644 index 0000000000..cfcef7fb31 --- /dev/null +++ b/src/KOKKOS/pair_hybrid_kokkos.h @@ -0,0 +1,109 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hybrid/kk,PairHybridKokkos) + +#else + +#ifndef LMP_PAIR_HYBRID_KOKKOS_H +#define LMP_PAIR_HYBRID_KOKKOS_H + +#include +#include "pair_hybrid.h" + +namespace LAMMPS_NS { + +class PairHybridKokkos : public PairHybrid { + friend class FixGPU; + friend class FixIntel; + friend class FixOMP; + friend class Force; + friend class Respa; + friend class Info; + public: + PairHybridKokkos(class LAMMPS *); + virtual ~PairHybridKokkos(); + void compute(int, int); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Pair style hybrid cannot have hybrid as an argument + +Self-explanatory. + +E: Pair style hybrid cannot have none as an argument + +Self-explanatory. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair coeff for hybrid has invalid style + +Style in pair coeff must have been listed in pair_style command. + +E: Pair hybrid sub-style is not used + +No pair_coeff command used a sub-style specified in the pair_style +command. + +E: Pair_modify special setting for pair hybrid incompatible with global special_bonds setting + +Cannot override a setting of 0.0 or 1.0 or change a setting between +0.0 and 1.0. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Invoked pair single on pair style none + +A command (e.g. a dump) attempted to invoke the single() function on a +pair style none, which is illegal. You are probably attempting to +compute per-atom quantities with an undefined pair style. + +E: Pair hybrid sub-style does not support single call + +You are attempting to invoke a single() call on a pair style +that doesn't support it. + +E: Pair hybrid single calls do not support per sub-style special bond values + +Self-explanatory. + +E: Unknown pair_modify hybrid sub-style + +The choice of sub-style is unknown. + +E: Coulomb cutoffs of pair hybrid sub-styles do not match + +If using a Kspace solver, all Coulomb cutoffs of long pair styles must +be the same. + +*/ diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp new file mode 100644 index 0000000000..55fed33f96 --- /dev/null +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp @@ -0,0 +1,28 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include "pair_hybrid_overlay_kokkos.h" +#include "atom.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairHybridOverlayKokkos::PairHybridOverlayKokkos(LAMMPS *lmp) : PairHybridOverlay(lmp) {} diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.h b/src/KOKKOS/pair_hybrid_overlay_kokkos.h new file mode 100644 index 0000000000..c9a50e3bb1 --- /dev/null +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hybrid/overlay/kk,PairHybridOverlayKokkos) + +#else + +#ifndef LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H +#define LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H + +#include "pair_hybrid_overlay.h" + +namespace LAMMPS_NS { + +class PairHybridOverlayKokkos : public PairHybridOverlay { + public: + PairHybridOverlayKokkos(class LAMMPS *); + virtual ~PairHybridOverlayKokkos() {} +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair coeff for hybrid has invalid style + +Style in pair coeff must have been listed in pair_style command. + +*/ diff --git a/src/pair_hybrid.h b/src/pair_hybrid.h index 4d224dafc3..a7a236d269 100644 --- a/src/pair_hybrid.h +++ b/src/pair_hybrid.h @@ -35,7 +35,7 @@ class PairHybrid : public Pair { public: PairHybrid(class LAMMPS *); virtual ~PairHybrid(); - void compute(int, int); + virtual void compute(int, int); void settings(int, char **); virtual void coeff(int, char **); void init_style(); @@ -88,10 +88,6 @@ class PairHybrid : public Pair { /* ERROR/WARNING messages: -E: Cannot yet use pair hybrid with Kokkos - -This feature is not yet supported. - E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the diff --git a/src/pair_hybrid_overlay.h b/src/pair_hybrid_overlay.h index 60cff45508..169583a48b 100644 --- a/src/pair_hybrid_overlay.h +++ b/src/pair_hybrid_overlay.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class PairHybridOverlay : public PairHybrid { public: PairHybridOverlay(class LAMMPS *); - ~PairHybridOverlay() {} + virtual ~PairHybridOverlay() {} void coeff(int, char **); private: From a42a666142cdab572ee6bac9ef81559f9c02ceb8 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 13 Jan 2017 13:23:26 -0700 Subject: [PATCH 084/267] support for eatom and vatom in pair_table_rx_kokkos --- src/KOKKOS/pair_table_rx_kokkos.cpp | 214 +++++++++++++++++++++------- src/KOKKOS/pair_table_rx_kokkos.h | 6 +- 2 files changed, 166 insertions(+), 54 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 66089009a2..7402a00900 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -155,6 +155,10 @@ template PairTableRXKokkos::~PairTableRXKokkos() { if (copymode) return; + + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + delete h_table; h_table = nullptr; delete d_table; @@ -257,14 +261,38 @@ template KOKKOS_INLINE_FUNCTION void ev_tally( + int eflag, + int eflag_atom, + int vflag, int vflag_global, + int vflag_atom, int nlocal, int i, int j, EV_FLOAT& ev, F_FLOAT epair, F_FLOAT fpair, - F_FLOAT delx, F_FLOAT dely, F_FLOAT delz) + F_FLOAT delx, F_FLOAT dely, F_FLOAT delz, + Kokkos::View::value> > v_vatom, + Kokkos::View::value> > v_eatom) { - if (vflag_global) { + if (eflag) { + if (eflag_atom) { + auto epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (vflag) { auto v0 = delx*delx*fpair; auto v1 = dely*dely*fpair; auto v2 = delz*delz*fpair; @@ -272,39 +300,69 @@ ev_tally( auto v4 = delx*delz*fpair; auto v5 = dely*delz*fpair; - if (NEIGHFLAG!=FULL) { - if (NEWTON_PAIR) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + if (i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } } else { - if (i < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } - if (j < nlocal) { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; } - } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; } } } @@ -338,8 +396,19 @@ compute_item( DeviceType,Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst d_table_const, - int vflag_global - ) { + int eflag, + int eflag_atom, + int vflag, + int vflag_global, + int vflag_atom, + Kokkos::View::value> > v_vatom, + Kokkos::View::value> > v_eatom) { EV_FLOAT ev; auto i = d_ilist(ii); auto xtmp = x(i,0); @@ -423,7 +492,10 @@ compute_item( if (EVFLAG) { ev_tally( - vflag_global,nlocal,i,j,ev,evdwl,fpair,delx,dely,delz); + eflag,eflag_atom, + vflag,vflag_global,vflag_atom, + nlocal,i,j,ev,evdwl,fpair,delx,dely,delz, + v_vatom, v_eatom); } } } @@ -440,7 +512,6 @@ compute_item( template static void compute_all_items( - int eflag, int vflag, int newton_pair, EV_FLOAT& ev, int nlocal, @@ -467,7 +538,19 @@ static void compute_all_items( DeviceType,Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst d_table_const, - int vflag_global) { + int eflag, + int eflag_atom, + int vflag, + int vflag_global, + int vflag_atom, + Kokkos::View::value> > v_vatom, + Kokkos::View::value> > v_eatom) { if (eflag || vflag) { Kokkos::parallel_reduce(inum, LAMMPS_LAMBDA(int i, EV_FLOAT& energy_virial) { @@ -477,14 +560,16 @@ static void compute_all_items( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, v_vatom, v_eatom); } else { energy_virial += compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, v_vatom, v_eatom); } }, ev); } else { @@ -495,13 +580,15 @@ static void compute_all_items( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, v_vatom, v_eatom); } else { compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, v_vatom, v_eatom); } }); } @@ -537,8 +624,16 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; - if (eflag_atom) error->all(FLERR, "pair table/rx/kk does not handle eflag_atom\n"); - if (vflag_atom) error->all(FLERR, "pair table/rx/kk does not handle vflag_atom\n"); + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } atomKK->sync(execution_space,datamask_read); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); @@ -578,34 +673,38 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if(atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == HALFTHREAD) { compute_all_items( - eflag, vflag, newton_pair, ev, nlocal, + newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } else if (neighflag == HALF) { compute_all_items( - eflag, vflag, newton_pair, ev, nlocal, + newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } } else { if (neighflag == HALFTHREAD) { compute_all_items( - eflag, vflag, newton_pair, ev, nlocal, + newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } else if (neighflag == HALF) { compute_all_items( - eflag, vflag, newton_pair, ev, nlocal, + newton_pair, ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, vflag_global); + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } } @@ -620,6 +719,16 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) } if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } } template @@ -1126,6 +1235,7 @@ void PairTableRXKokkos::cleanup_copy() { vatom = NULL; h_table=NULL; d_table=NULL; } + namespace LAMMPS_NS { template class PairTableRXKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 4e94802d72..c7ecd370a4 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -88,8 +88,6 @@ class PairTableRXKokkos : public PairTable { typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_f_array f; - typename ArrayTypes::t_efloat_1d d_eatom; - typename ArrayTypes::t_virial_array d_vatom; int neighflag; @@ -106,6 +104,10 @@ class PairTableRXKokkos : public PairTable { int isite1, isite2; bool fractionalWeighting; + typename ArrayTypes::tdual_efloat_1d k_eatom; + typename ArrayTypes::tdual_virial_array k_vatom; + typename ArrayTypes::t_efloat_1d d_eatom; + typename ArrayTypes::t_virial_array d_vatom; }; } From 2b2998052c567719d6076435b640186105dda2ce Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 13 Jan 2017 13:50:21 -0700 Subject: [PATCH 085/267] Fixing inheritance issue in pair_hybrid_overlay_kokkos --- src/KOKKOS/pair_hybrid_overlay_kokkos.cpp | 116 +++++++++++++++++++++- src/KOKKOS/pair_hybrid_overlay_kokkos.h | 8 +- 2 files changed, 121 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp index 55fed33f96..79d9c63221 100644 --- a/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp @@ -25,4 +25,118 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -PairHybridOverlayKokkos::PairHybridOverlayKokkos(LAMMPS *lmp) : PairHybridOverlay(lmp) {} +PairHybridOverlayKokkos::PairHybridOverlayKokkos(LAMMPS *lmp) : PairHybridKokkos(lmp) {} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairHybridOverlayKokkos::coeff(int narg, char **arg) +{ + if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); + force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); + + // 3rd arg = pair sub-style name + // 4th arg = pair sub-style index if name used multiple times + // allow for "none" as valid sub-style name + + int multflag; + int m; + + for (m = 0; m < nstyles; m++) { + multflag = 0; + if (strcmp(arg[2],keywords[m]) == 0) { + if (multiple[m]) { + multflag = 1; + if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients"); + if (!isdigit(arg[3][0])) + error->all(FLERR,"Incorrect args for pair coefficients"); + int index = force->inumeric(FLERR,arg[3]); + if (index == multiple[m]) break; + else continue; + } else break; + } + } + + int none = 0; + if (m == nstyles) { + if (strcmp(arg[2],"none") == 0) none = 1; + else error->all(FLERR,"Pair coeff for hybrid has invalid style"); + } + + // move 1st/2nd args to 2nd/3rd args + // if multflag: move 1st/2nd args to 3rd/4th args + // just copy ptrs, since arg[] points into original input line + + arg[2+multflag] = arg[1]; + arg[1+multflag] = arg[0]; + + // invoke sub-style coeff() starting with 1st remaining arg + + if (!none) styles[m]->coeff(narg-1-multflag,&arg[1+multflag]); + + // set setflag and which type pairs map to which sub-style + // if sub-style is none: set hybrid subflag, wipe out map + // else: set hybrid setflag & map only if substyle setflag is set + // if sub-style is new for type pair, add as multiple mapping + // if sub-style exists for type pair, don't add, just update coeffs + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + if (none) { + setflag[i][j] = 1; + nmap[i][j] = 0; + count++; + } else if (styles[m]->setflag[i][j]) { + int k; + for (k = 0; k < nmap[i][j]; k++) + if (map[i][j][k] == m) break; + if (k == nmap[i][j]) map[i][j][nmap[i][j]++] = m; + setflag[i][j] = 1; + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + combine sub-style neigh list requests and create new ones if needed +------------------------------------------------------------------------- */ + +void PairHybridOverlayKokkos::modify_requests() +{ + int i,j; + NeighRequest *irq,*jrq; + + // loop over pair requests only + // if a previous list is same kind with same skip attributes + // then make this one a copy list of that one + // works whether both lists are no-skip or yes-skip + // will not point a list at a copy list, but at copy list's parent + + for (i = 0; i < neighbor->nrequest; i++) { + if (!neighbor->requests[i]->pair) continue; + + irq = neighbor->requests[i]; + for (j = 0; j < i; j++) { + if (!neighbor->requests[j]->pair) continue; + jrq = neighbor->requests[j]; + if (irq->same_kind(jrq) && irq->same_skip(jrq)) { + irq->copy = 1; + irq->otherlist = j; + break; + } + } + } + + // perform same operations on skip lists as pair style = hybrid + + PairHybrid::modify_requests(); +} diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.h b/src/KOKKOS/pair_hybrid_overlay_kokkos.h index c9a50e3bb1..2e4899a1f3 100644 --- a/src/KOKKOS/pair_hybrid_overlay_kokkos.h +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.h @@ -20,14 +20,18 @@ PairStyle(hybrid/overlay/kk,PairHybridOverlayKokkos) #ifndef LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H #define LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H -#include "pair_hybrid_overlay.h" +#include "pair_hybrid_kokkos.h" namespace LAMMPS_NS { -class PairHybridOverlayKokkos : public PairHybridOverlay { +class PairHybridOverlayKokkos : public PairHybridKokkos { public: PairHybridOverlayKokkos(class LAMMPS *); virtual ~PairHybridOverlayKokkos() {} + void coeff(int, char **); + + private: + void modify_requests(); }; } From 688df1c2542cef4461b99e99632cce54dd0eb51d Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 13 Jan 2017 14:40:36 -0700 Subject: [PATCH 086/267] fix CUDA type issues in pair_table_rx_kokkos stop using the global DAT, use the pair's DeviceType for all the relevant types. --- src/KOKKOS/pair_table_rx_kokkos.cpp | 34 +++++++++++++++++------------ src/KOKKOS/pair_table_rx_kokkos.h | 3 --- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 7402a00900..58108c9308 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -272,11 +272,11 @@ ev_tally( F_FLOAT epair, F_FLOAT fpair, F_FLOAT delx, F_FLOAT dely, F_FLOAT delz, Kokkos::View::t_virial_array::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_vatom, Kokkos::View::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_eatom) { @@ -390,10 +390,14 @@ compute_item( typename ArrayTypes::t_f_array::array_layout, DeviceType, Kokkos::MemoryTraits::value> > f, - Kokkos::View::value> > uCG, - Kokkos::View::value> > uCGnew, + Kokkos::View::t_efloat_1d::array_layout, + DeviceType, + Kokkos::MemoryTraits::value> > uCG, + Kokkos::View::t_efloat_1d::array_layout, + DeviceType, + Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst d_table_const, int eflag, @@ -402,11 +406,11 @@ compute_item( int vflag_global, int vflag_atom, Kokkos::View::t_virial_array::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_vatom, Kokkos::View::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_eatom) { EV_FLOAT ev; @@ -532,9 +536,11 @@ static void compute_all_items( typename ArrayTypes::t_f_array::array_layout, DeviceType, Kokkos::MemoryTraits::value> > f, - Kokkos::View::t_efloat_1d::array_layout, DeviceType,Kokkos::MemoryTraits::value> > uCG, - Kokkos::View::t_efloat_1d::array_layout, DeviceType,Kokkos::MemoryTraits::value> > uCGnew, int isite1, int isite2, typename PairTableRXKokkos::TableDeviceConst d_table_const, @@ -544,11 +550,11 @@ static void compute_all_items( int vflag_global, int vflag_atom, Kokkos::View::t_virial_array::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_vatom, Kokkos::View::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits::value> > v_eatom) { if (eflag || vflag) { @@ -627,12 +633,12 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); - d_eatom = k_eatom.d_view; + d_eatom = k_eatom.template view(); } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); - d_vatom = k_vatom.d_view; + d_vatom = k_vatom.template view(); } atomKK->sync(execution_space,datamask_read); diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index c7ecd370a4..54c114a433 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -30,9 +30,6 @@ namespace LAMMPS_NS { template class PairTableRXKokkos : public PairTable { public: - - using DAT = ArrayTypes; - enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2}; typedef DeviceType device_type; From 91d68e26eff86b7e1fe50bb3786b13b7f6a07b30 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Jan 2017 12:26:00 -0700 Subject: [PATCH 087/267] Prevent overlapping host/device computation in pair_hybrid_kokkos --- src/KOKKOS/pair_hybrid_kokkos.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp index 973d60348f..9c0948b7d4 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -35,6 +35,11 @@ using namespace LAMMPS_NS; PairHybridKokkos::PairHybridKokkos(LAMMPS *lmp) : PairHybrid(lmp) { atomKK = (AtomKokkos *) atom; + + // prevent overlapping host/device computation, which isn't + // yet supported by pair_hybrid_kokkos + execution_space = Device; + datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; } From 8b4130c0cbbbf6bfb69e01d51f5ba47c94ecd3ed Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Jan 2017 13:28:55 -0700 Subject: [PATCH 088/267] Fixing issue with pressure in pair_hybrid_kokkos --- src/KOKKOS/pair_hybrid_kokkos.cpp | 9 ++++++++- src/KOKKOS/pair_hybrid_kokkos.h | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp index 9c0948b7d4..337b56c6ce 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -148,5 +148,12 @@ void PairHybridKokkos::compute(int eflag, int vflag) delete [] saved_special; - if (vflag_fdotr) virial_fdotr_compute(); + // perform virial_fdotr on device + + atomKK->sync(Device,X_MASK|F_MASK); + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + + if (vflag_fdotr) + pair_virial_fdotr_compute(this); } diff --git a/src/KOKKOS/pair_hybrid_kokkos.h b/src/KOKKOS/pair_hybrid_kokkos.h index cfcef7fb31..62d325925b 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.h +++ b/src/KOKKOS/pair_hybrid_kokkos.h @@ -22,6 +22,8 @@ PairStyle(hybrid/kk,PairHybridKokkos) #include #include "pair_hybrid.h" +#include "pair_kokkos.h" +#include "kokkos_type.h" namespace LAMMPS_NS { @@ -33,9 +35,16 @@ class PairHybridKokkos : public PairHybrid { friend class Respa; friend class Info; public: + typedef LMPDeviceType device_type; + PairHybridKokkos(class LAMMPS *); virtual ~PairHybridKokkos(); void compute(int, int); + + private: + DAT::t_x_array_randomread x; + DAT::t_f_array f; + friend void pair_virial_fdotr_compute(PairHybridKokkos*); }; } From 5569c4c130c08656ddf4313127effcd039185bf6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Jan 2017 16:19:25 -0700 Subject: [PATCH 089/267] Fixing GPU memory issue with fix_property_atom_kokkos --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 58fc9c46c3..82d45dfcd4 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -86,12 +86,12 @@ void AtomVecDPDKokkos::grow(int n) memory->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); memory->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); - grow_reset(); - sync(Host,ALL_MASK); - if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); + + grow_reset(); + sync(Host,ALL_MASK); } /* ---------------------------------------------------------------------- From 96636c7514a8fa9f978e3bbb42972a986e458285 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Jan 2017 16:43:55 -0700 Subject: [PATCH 090/267] Fixing warnings in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 4 ++-- src/KOKKOS/pair_exp6_rx_kokkos.h | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 3ce6b78e57..9be44666aa 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -284,7 +284,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::value> > a_uCG = uCG; Kokkos::View::value> > a_uCGnew = uCGnew; - int i,j,jj,jnum,itype,jtype; + int i,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq,r2inv,r6inv,forceExp6,factor_lj; double rCut,rCutInv,rCut2inv,rCut6inv,rCutExp,urc,durc; @@ -508,7 +508,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeall(FLERR,"alpha_ij is 6.0 in pair exp6"); + k_error_flag.d_view() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rm12_ij; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 488c9d0039..1f2172471b 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -150,6 +150,24 @@ class PairExp6rxKokkos : public PairExp6rx { friend void pair_virial_fdotr_compute(PairExp6rxKokkos*); }; + +// optimized version of pow(x,n) with n being integer +// up to 10x faster than pow(x,y) + +KOKKOS_INLINE_FUNCTION +static double powint(const double &x, const int n) { + double yy,ww; + + if (x == 0.0) return 0.0; + int nn = (n > 0) ? n : -n; + ww = x; + + for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww) + if (nn & 1) yy *= ww; + + return (n > 0) ? yy : 1.0/yy; +}; + } #endif From b38733e5a2b73e6f1a3d6ec37958bc68251f2bca Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 18 Jan 2017 10:15:06 -0700 Subject: [PATCH 091/267] Fixing GPU memory issue in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 9be44666aa..bde3a32b4b 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -659,6 +659,10 @@ void PairExp6rxKokkos::coeff(int narg, char **arg) s_coeffEps[i] = coeffEps[i]; s_coeffRm[i] = coeffRm[i]; } + + k_params.template modify(); + k_params.template sync(); + d_params = k_params.template view(); } /* ---------------------------------------------------------------------- */ @@ -776,10 +780,6 @@ void PairExp6rxKokkos::read_file(char *file) } delete [] words; - - k_params.template modify(); - k_params.template sync(); - d_params = k_params.template view(); } /* ---------------------------------------------------------------------- */ From 2d32fa8ccb046159155212e36b09b663608525d5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 18 Jan 2017 12:53:40 -0700 Subject: [PATCH 092/267] Fixing GPU memory issues in atom_vec_dpd_kokkos --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 45 +++++++++++++++++++----------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 82d45dfcd4..699ea61c9d 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -256,7 +256,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); + sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -292,7 +292,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, } LMPHostType::fence(); } else { - sync(Device,X_MASK); + sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -400,8 +400,8 @@ struct AtomVecDPDKokkos_PackCommSelf { int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -437,8 +437,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list } LMPHostType::fence(); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -520,16 +520,16 @@ struct AtomVecDPDKokkos_UnpackComm { void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); Kokkos::parallel_for(n,f); LMPDeviceType::fence(); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); @@ -1107,9 +1107,13 @@ struct AtomVecDPDKokkos_UnpackBorder { void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK); if(space==Host) { struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), h_x,h_tag,h_type,h_mask, @@ -1137,7 +1141,9 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1168,7 +1174,9 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1489,7 +1497,8 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf) int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK); + MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; @@ -1547,7 +1556,8 @@ int AtomVecDPDKokkos::size_restart() int AtomVecDPDKokkos::pack_restart(int i, double *buf) { sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK ); + MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | + UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); int m = 1; buf[m++] = h_x(i,0); @@ -1586,7 +1596,8 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK ); + MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | + UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; From e05b1322895337ed3653b74adebfc54208db3649 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 18 Jan 2017 14:18:35 -0700 Subject: [PATCH 093/267] Fixing error check in fix_eos_table_rx_kokkos --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index aff2cdfa2d..40b44d6744 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -404,7 +404,8 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub if(it==maxit){ if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2)) k_error_flag.d_view() = 2; - k_error_flag.d_view() = 3; + else + k_error_flag.d_view() = 3; } thetai = temp; } From 116ae9d0c42aa949f9478a95ba20654804442381 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 18 Jan 2017 14:51:35 -0700 Subject: [PATCH 094/267] Fixing copy bug in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index bde3a32b4b..acba9e473b 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -1095,7 +1095,7 @@ void PairExp6rxKokkos::polynomialScaling(double phi, double &alpha, alpha = (s_coeffAlpha[0]*phi5 + s_coeffAlpha[1]*phi4 + s_coeffAlpha[2]*phi3 + s_coeffAlpha[3]*phi2 + s_coeffAlpha[4]*phi + s_coeffAlpha[5]); epsilon *= (s_coeffEps[0]*phi5 + s_coeffEps[1]*phi4 + s_coeffEps[2]*phi3 + s_coeffEps[3]*phi2 + s_coeffEps[4]*phi + s_coeffEps[5]); - rm *= (s_coeffEps[0]*phi5 + s_coeffEps[1]*phi4 + s_coeffEps[2]*phi3 + s_coeffEps[3]*phi2 + s_coeffEps[4]*phi + s_coeffEps[5]); + rm *= (s_coeffRm[0]*phi5 + s_coeffRm[1]*phi4 + s_coeffRm[2]*phi3 + s_coeffRm[3]*phi2 + s_coeffRm[4]*phi + s_coeffRm[5]); } /* ---------------------------------------------------------------------- */ From cf83ce454369b365efff7210382b7f2a3a246cf1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 19 Jan 2017 08:44:30 -0700 Subject: [PATCH 095/267] Adding zero compute to pair_dpd_fdt_energy_kokkos --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 32 ++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 5de2b38ed0..ec807a0e08 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -170,21 +170,23 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev; if (splitFDT_flag) { - if (neighflag == HALF) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } - } else if (neighflag == HALFTHREAD) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (!a0_is_zero) { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } } else { From 917ca19b340dec624890201ebb7280c2a64fef0a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 19 Jan 2017 09:54:15 -0700 Subject: [PATCH 096/267] Fixing GPU memory issue in modify_kokkos, need to cherry pick back to Master --- src/KOKKOS/modify_kokkos.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp index ec3831dff8..b4a89c8e39 100644 --- a/src/KOKKOS/modify_kokkos.cpp +++ b/src/KOKKOS/modify_kokkos.cpp @@ -360,9 +360,7 @@ void ModifyKokkos::post_run() for (int i = 0; i < nfix; i++) { atomKK->sync(fix[i]->execution_space, fix[i]->datamask_read); - if (!fix[i]->kokkosable) lmp->kokkos->auto_sync = 1; fix[i]->post_run(); - lmp->kokkos->auto_sync = 0; atomKK->modified(fix[i]->execution_space, fix[i]->datamask_modify); } From de6442d8450cbebc62267dcc6872c58e68947766 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 19 Jan 2017 11:55:22 -0700 Subject: [PATCH 097/267] Fixing GPU memory issues in Kokkos --- src/KOKKOS/domain_kokkos.cpp | 4 ++-- src/KOKKOS/verlet_kokkos.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index cf65316ec9..5c1f1a60b9 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -354,7 +354,6 @@ void DomainKokkos::pbc() } atomKK->sync(Device,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK); - atomKK->modified(Device,X_MASK|V_MASK|IMAGE_MASK); if (xperiodic || yperiodic || zperiodic) { if (deform_vremap) { @@ -385,8 +384,9 @@ void DomainKokkos::pbc() Kokkos::parallel_for(nlocal,f); } } - LMPDeviceType::fence(); + + atomKK->modified(Device,X_MASK|V_MASK|IMAGE_MASK); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 20c4035276..53b4042376 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -170,7 +170,7 @@ void VerletKokkos::setup() modify->setup(vflag); output->setup(); - lmp->kokkos->auto_sync = 0; + lmp->kokkos->auto_sync = 1; update->setupflag = 1; } From 521f3df3d5939fe2d61b3fb9f2e756200822ba6e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 19 Jan 2017 16:54:50 -0700 Subject: [PATCH 098/267] Initialize variables in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index acba9e473b..dd3228efc4 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -311,6 +311,9 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute Date: Tue, 24 Jan 2017 11:24:47 -0700 Subject: [PATCH 099/267] Fixing GPU memory issue in fix_eos_table_rx_kokkos --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 40b44d6744..38222e6dd7 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -45,6 +45,8 @@ template FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char **arg) : FixEOStableRX(lmp, narg, arg) { + int kokkosable = 1; + atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; @@ -181,7 +183,7 @@ void FixEOStableRXKokkos::init() } else { atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK); + atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK); } error_check(); @@ -223,9 +225,8 @@ void FixEOStableRXKokkos::post_integrate() dvector = atomKK->k_dvector.view(); atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK); - atomKK->modified(execution_space,DPDTHETA_MASK); - Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + atomKK->modified(execution_space,DPDTHETA_MASK); error_check(); From 8e808f6c6b861cd46329b3c4d58e97631661896d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 24 Jan 2017 11:45:27 -0700 Subject: [PATCH 100/267] Zeroing variables in pair_exp6_rx_kokkos to match pull request --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 66 ++++++++++++++++-------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index dd3228efc4..23c217ef6e 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -311,9 +311,6 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxComputetemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } + + // + // Apply Mixing Rule to get the overall force for the CG pair + // + if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12; + else fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*fpairOldEXP6_21; + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (isite1 == isite2) evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12; + else evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12 + sqrt(mixWtSite2_i*mixWtSite1_j)*evdwlEXP6_21; + evdwl *= factor_lj; + + uCGnew_i += 0.5*evdwl; + if (NEWTON_PAIR || j < nlocal) + a_uCGnew[j] += 0.5*evdwl; + evdwl = evdwlOld; + if (EVFLAG) + ev.evdwl += ((NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } } From c617bc180afd1295fd49ffa71fdf779e8bf67603 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 08:52:17 -0700 Subject: [PATCH 101/267] Adding sync/modify to pair_multi_lucy_rx_kokkos --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index fac1478e32..8399fccc64 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -183,8 +183,6 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in dvector = atomKK->k_dvector.view(); atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); - if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); - else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); k_cutsq.template sync(); nlocal = atom->nlocal; @@ -231,6 +229,9 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in } } + if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); + else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK); + k_error_flag.template modify(); k_error_flag.template sync(); if (k_error_flag.h_view() == 1) @@ -454,7 +455,6 @@ void PairMultiLucyRXKokkos::computeLocalDensity() nlocal = atom->nlocal; atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK); - atomKK->modified(execution_space,DPDRHO_MASK); const int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); @@ -492,14 +492,14 @@ void PairMultiLucyRXKokkos::computeLocalDensity() Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + atomKK->modified(execution_space,DPDRHO_MASK); + // communicate and sum densities (on the host) if (newton_pair) { - atomKK->modified(execution_space,DPDRHO_MASK); atomKK->sync(Host,DPDRHO_MASK); comm->reverse_comm_pair(this); atomKK->modified(Host,DPDRHO_MASK); - atomKK->sync(execution_space,DPDRHO_MASK); } comm->forward_comm_pair(this); @@ -687,6 +687,8 @@ int PairMultiLucyRXKokkos::pack_forward_comm(int n, int *list, doubl { int i,j,m; + atomKK->sync(Host,DPDRHO_MASK); + m = 0; for (i = 0; i < n; i++) { j = list[i]; @@ -705,6 +707,8 @@ void PairMultiLucyRXKokkos::unpack_forward_comm(int n, int first, do m = 0; last = first + n; for (i = first; i < last; i++) h_rho[i] = buf[m++]; + + atomKK->modified(Host,DPDRHO_MASK); } /* ---------------------------------------------------------------------- */ From 8050eb3aa85c95fd55433208f185ff7f9bc74e02 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 09:17:59 -0700 Subject: [PATCH 102/267] Another tweak to sync/modify in pair_multi_lucy_rx_kokkos --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 8399fccc64..2e6d48227f 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -496,11 +496,8 @@ void PairMultiLucyRXKokkos::computeLocalDensity() // communicate and sum densities (on the host) - if (newton_pair) { - atomKK->sync(Host,DPDRHO_MASK); + if (newton_pair) comm->reverse_comm_pair(this); - atomKK->modified(Host,DPDRHO_MASK); - } comm->forward_comm_pair(this); } @@ -648,6 +645,8 @@ template int PairMultiLucyRXKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, int pbc_flag, int *pbc) { + atomKK->sync(execution_space,DPDRHO_MASK); + d_sendlist = k_sendlist.view(); iswap = iswap_in; v_buf = buf.view(); @@ -672,6 +671,8 @@ void PairMultiLucyRXKokkos::unpack_forward_comm_kokkos(int n, int fi v_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); DeviceType::fence(); + + atomKK->modified(execution_space,DPDRHO_MASK); } template @@ -718,6 +719,8 @@ int PairMultiLucyRXKokkos::pack_reverse_comm(int n, int first, doubl { int i,m,last; + atomKK->sync(Host,DPDRHO_MASK); + m = 0; last = first + n; for (i = first; i < last; i++) buf[m++] = h_rho[i]; @@ -736,6 +739,8 @@ void PairMultiLucyRXKokkos::unpack_reverse_comm(int n, int *list, do j = list[i]; h_rho[j] += buf[m++]; } + + atomKK->modified(Host,DPDRHO_MASK); } /* ---------------------------------------------------------------------- */ From 6cc969db9282e5712ce659b951615b14366a7e78 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 09:24:13 -0700 Subject: [PATCH 103/267] Fixing warnings in Kokkos --- src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 2 +- src/KOKKOS/rand_pool_wrap_kokkos.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 38222e6dd7..8487fd4c4f 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -45,7 +45,7 @@ template FixEOStableRXKokkos::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char **arg) : FixEOStableRX(lmp, narg, arg) { - int kokkosable = 1; + kokkosable = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.h b/src/KOKKOS/rand_pool_wrap_kokkos.h index 349896ee9a..ce134e5215 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.h +++ b/src/KOKKOS/rand_pool_wrap_kokkos.h @@ -24,6 +24,7 @@ namespace LAMMPS_NS { struct RandWrap { class RanMars* rng; + KOKKOS_INLINE_FUNCTION RandWrap() { rng = NULL; } From be13ecfa17cf837b0b1b4a69f8b0e733b9c5dae3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 10:03:43 -0700 Subject: [PATCH 104/267] Fixing Kokkos warnings --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 2 ++ src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 13 +------------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 2c2b78ac57..7d1749eb94 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -80,7 +80,9 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { int sbmask(const int& j) const; struct params_dpd { + KOKKOS_INLINE_FUNCTION params_dpd(){cut=0;a0=0;sigma=0;kappa=0;}; + KOKKOS_INLINE_FUNCTION params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;}; F_FLOAT cut,a0,sigma,kappa; }; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 2e6d48227f..30b49a8e8d 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -54,17 +54,6 @@ enum{NONE,RLINEAR,RSQ}; #define oneFluidParameter (-1) #define isOneFluid(_site) ( (_site) == oneFluidParameter ) -static const char cite_pair_multi_lucy_rx[] = - "pair_style multi/lucy/rx command:\n\n" - "@Article{Moore16,\n" - " author = {J.D. Moore, B.C. Barnes, S. Izvekov, M. Lisal, M.S. Sellers, D.E. Taylor and J. K. Brennan},\n" - " title = {A coarse-grain force field for RDX: Density dependent and energy conserving},\n" - " journal = {J. Chem. Phys.},\n" - " year = 2016,\n" - " volume = 144\n" - " pages = {104501}\n" - "}\n\n"; - /* ---------------------------------------------------------------------- */ template @@ -278,7 +267,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute::value> > a_f = f; - int i,jj,inum,jnum,itype,jtype,itable; + int i,jj,jnum,itype,jtype,itable; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; double rsq; From 85c8db5f86c7becfdb6c2d6831368abebabae0d4 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 10:09:45 -0700 Subject: [PATCH 105/267] Fixing warning in pair_dpd_fdt_energy_kokkos --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index ec807a0e08..84a489bcc3 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -277,7 +277,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp // The f array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_f = f; - int i,j,jj,inum,jnum,itype,jtype; + int i,j,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r,rinv,wd,wr,factor_dpd; From ebe27c65e18645f6aded43a039c8c3af2337afac Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 26 Jan 2017 10:33:03 -0700 Subject: [PATCH 106/267] Removing duplicate code in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 4 ++-- src/KOKKOS/pair_exp6_rx_kokkos.h | 18 ------------------ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 23c217ef6e..962dcfd031 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -25,7 +25,7 @@ #include "force.h" #include "neigh_list.h" #include "math_const.h" -#include "math_special.h" +#include "math_special_kokkos.h" #include "memory.h" #include "error.h" #include "modify.h" @@ -36,7 +36,7 @@ using namespace LAMMPS_NS; using namespace MathConst; -using namespace MathSpecial; +using namespace MathSpecialKokkos; #define MAXLINE 1024 #define DELTA 4 diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 1f2172471b..488c9d0039 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -150,24 +150,6 @@ class PairExp6rxKokkos : public PairExp6rx { friend void pair_virial_fdotr_compute(PairExp6rxKokkos*); }; - -// optimized version of pow(x,n) with n being integer -// up to 10x faster than pow(x,y) - -KOKKOS_INLINE_FUNCTION -static double powint(const double &x, const int n) { - double yy,ww; - - if (x == 0.0) return 0.0; - int nn = (n > 0) ? n : -n; - ww = x; - - for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww) - if (nn & 1) yy *= ww; - - return (n > 0) ? yy : 1.0/yy; -}; - } #endif From a1f4551ac20e6660d7903f779ad67b4e56d7069d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 27 Jan 2017 10:18:41 -0700 Subject: [PATCH 107/267] Adding missing sync/modified in atom_vec_dpd_kokkos --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 101 ++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 699ea61c9d..820f11c215 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -156,6 +156,10 @@ void AtomVecDPDKokkos::grow_reset() void AtomVecDPDKokkos::copy(int i, int j, int delflag) { + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | + UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); + h_tag[j] = h_tag[i]; h_type[j] = h_type[i]; mask[j] = mask[i]; @@ -176,6 +180,10 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag) if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); + + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | + UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); } /* ---------------------------------------------------------------------- */ @@ -546,6 +554,8 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; + sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -590,6 +600,8 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; + sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -674,6 +686,8 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) h_uMech[i] = buf[m++]; h_uChem[i] = buf[m++]; } + + modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -696,6 +710,8 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) h_uMech[i] = buf[m++]; h_uChem[i] = buf[m++]; } + + modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -805,6 +821,8 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA { X_FLOAT dx,dy,dz; + sync(space,ALL_MASK); + if (pbc_flag != 0) { if (domain->triclinic == 0) { dx = pbc[0]*domain->xprd; @@ -864,6 +882,8 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; + sync(Host,ALL_MASK); + m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -923,6 +943,8 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; + sync(Host,ALL_MASK); + m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1016,6 +1038,9 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) { int i,j,m; + sync(Host,DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + m = 0; for (i = 0; i < n; i++) { j = list[i]; @@ -1035,6 +1060,9 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) { int i,j,m; + sync(Host,DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + m = 0; for (i = 0; i < n; i++) { j = list[i]; @@ -1113,7 +1141,7 @@ void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, while (first+n >= nmax) grow(0); modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK); + UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); if(space==Host) { struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), h_x,h_tag,h_type,h_mask, @@ -1141,9 +1169,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK); + h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1162,6 +1188,10 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) for (int iextra = 0; iextra < atom->nextra_border; iextra++) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } /* ---------------------------------------------------------------------- */ @@ -1174,9 +1204,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| - DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| - UCG_MASK|UCGNEW_MASK); + h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1198,6 +1226,10 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) for (int iextra = 0; iextra < atom->nextra_border; iextra++) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); + + modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| + UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } /* ---------------------------------------------------------------------- */ @@ -1216,6 +1248,10 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_uCG(i) = buf[m++]; h_uCGnew(i) = buf[m++]; } + + modified(Host,DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + return m; } @@ -1235,6 +1271,10 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) h_uCG(i) = buf[m++]; h_uCGnew(i) = buf[m++]; } + + modified(Host,DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); + return m; } @@ -1356,23 +1396,31 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d int newsize = nsend*17/k_buf.view().dimension_1()+1; k_buf.resize(newsize,k_buf.view().dimension_1()); } + sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | + DVECTOR_MASK); if(space == Host) { AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); LMPHostType::fence(); - return nsend*17; } else { AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); LMPDeviceType::fence(); - return nsend*17; } + return nsend*17; } /* ---------------------------------------------------------------------- */ int AtomVecDPDKokkos::pack_exchange(int i, double *buf) { + sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | + DVECTOR_MASK); + int m = 1; buf[m++] = h_x(i,0); buf[m++] = h_x(i,1); @@ -1475,7 +1523,6 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/17,f); LMPHostType::fence(); - return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; k_count.modify(); @@ -1485,9 +1532,14 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre LMPDeviceType::fence(); k_count.modify(); k_count.sync(); - - return k_count.h_view(0); } + + modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | + DVECTOR_MASK); + + return k_count.h_view(0); } /* ---------------------------------------------------------------------- */ @@ -1496,9 +1548,6 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | - UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; @@ -1523,6 +1572,11 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf) m += modify->fix[atom->extra_grow[iextra]]-> unpack_exchange(nlocal,&buf[m]); + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | + UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | + DVECTOR_MASK); + atom->nlocal++; return m; } @@ -1595,9 +1649,6 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | - MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | - UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; @@ -1621,6 +1672,10 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } + modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | + UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); + atom->nlocal++; return m; } @@ -1661,6 +1716,10 @@ void AtomVecDPDKokkos::create_atom(int itype, double *coord) h_uCGnew[nlocal] = 0.0; h_duChem[nlocal] = 0.0; + //atomKK->modified(Host,TAG_MASK|TYPE_MASK|DPDTHETA_MASK|X_MASK|IMAGE_MASK| + // MASK_MASK|V_MASK|DPDRHO_MASK|UCOND_MASK|UMECH_MASK| + // UCHEM_MASK|UCG_MASK|UCGNEW_MASK); + atom->nlocal++; } @@ -1716,6 +1775,8 @@ int AtomVecDPDKokkos::data_atom_hybrid(int nlocal, char **values) { h_dpdTheta(nlocal) = atof(values[0]); + atomKK->modified(Host,DPDTHETA_MASK); + return 1; } @@ -1725,6 +1786,8 @@ int AtomVecDPDKokkos::data_atom_hybrid(int nlocal, char **values) void AtomVecDPDKokkos::pack_data(double **buf) { + atomKK->sync(Host,TAG_MASK|TYPE_MASK|DPDTHETA_MASK|X_MASK|IMAGE_MASK); + int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { buf[i][0] = ubuf(h_tag(i)).d; @@ -1745,6 +1808,8 @@ void AtomVecDPDKokkos::pack_data(double **buf) int AtomVecDPDKokkos::pack_data_hybrid(int i, double *buf) { + atomKK->sync(Host,DPDTHETA_MASK); + buf[0] = h_dpdTheta(i); return 1; } From 43d61f313f566b53fd00112b594395a3c40b2145 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sun, 22 Jan 2017 15:03:45 -0500 Subject: [PATCH 108/267] Initial bare-bones port of FixRX to Kokkos. Initial port of USER-DPD/fix_rx.cpp to KOKKOS/fix_rx_kokkos.cpp. Using parallel_reduce(...) but still using host-only data. TODO: 1. Switch to KOKKOS datatypes for sparse-kinetics data; dense is finished. 2. Switch to using KOKKOS data for dvector. 3. Remove dependencies in rhs(...) on atom. Store those consts in UserData{} or as member constants. 4. Port ComputeLocalTemp(...) to Kokkos (needs pairing algorithm). --- src/KOKKOS/fix_rx_kokkos.cpp | 887 +++++++++++++++++++++++++++++++++++ src/KOKKOS/fix_rx_kokkos.h | 124 +++++ 2 files changed, 1011 insertions(+) create mode 100644 src/KOKKOS/fix_rx_kokkos.cpp create mode 100644 src/KOKKOS/fix_rx_kokkos.h diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp new file mode 100644 index 0000000000..f8a10dff93 --- /dev/null +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -0,0 +1,887 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include "fix_rx_kokkos.h" +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "force.h" +#include "memory.h" +#include "update.h" +#include "respa.h" +#include "modify.h" +#include "error.h" +#include "math_special.h" + +#include // DBL_EPSILON + +using namespace LAMMPS_NS; +using namespace FixConst; +using namespace MathSpecial; + +#ifdef DBL_EPSILON + #define MY_EPSILON (10.0*DBL_EPSILON) +#else + #define MY_EPSILON (10.0*2.220446049250313e-16) +#endif + +#define SparseKinetics_enableIntegralReactions (true) +#define SparseKinetics_invalidIndex (-1) + +namespace /* anonymous */ +{ + +typedef double TimerType; +TimerType getTimeStamp(void) { return MPI_Wtime(); } +double getElapsedTime( const TimerType &t0, const TimerType &t1) { return t1-t0; } + +} // end namespace + +/* ---------------------------------------------------------------------- */ + +template +FixRxKokkos::FixRxKokkos(LAMMPS *lmp, int narg, char **arg) : + FixRX(lmp, narg, arg), + pairDPDEKK(NULL), + update_kinetics_data(true) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + printf("Inside FixRxKokkos::FixRxKokkos\n"); +} + +template +FixRxKokkos::~FixRxKokkos() +{ + printf("Inside FixRxKokkos::~FixRxKokkos\n"); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::init() +{ + printf("Inside FixRxKokkos::init\n"); + + // Call the parent's version. + FixRX::init(); + + pairDPDEKK = dynamic_cast(pairDPDE); + if (pairDPDEKK == NULL) + error->all(FLERR,"Must use pair_style dpd/fdt/energy/kk with fix rx/kk"); + + if (update_kinetics_data) + create_kinetics_data(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::rk4(const double t_stop, double *y, double *rwork, void* v_params) const +{ + double *k1 = rwork; + double *k2 = k1 + nspecies; + double *k3 = k2 + nspecies; + double *k4 = k3 + nspecies; + double *yp = k4 + nspecies; + + const int numSteps = minSteps; + + const double h = t_stop / double(numSteps); + + // Run the requested steps with h. + for (int step = 0; step < numSteps; step++) + { + // k1 + rhs(0.0,y,k1,v_params); + + // k2 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies]; + + rhs(0.0,yp,k2,v_params); + + // k3 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies]; + + rhs(0.0,yp,k3,v_params); + + // k4 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + h*k3[ispecies]; + + rhs(0.0,yp,k4,v_params); + + for (int ispecies = 0; ispecies < nspecies; ispecies++) + y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0); + + } // end for (int step... + +} + +/* ---------------------------------------------------------------------- */ + +// f1 = dt*f(t,x) +// f2 = dt*f(t+ c20*dt,x + c21*f1) +// f3 = dt*f(t+ c30*dt,x + c31*f1 + c32*f2) +// f4 = dt*f(t+ c40*dt,x + c41*f1 + c42*f2 + c43*f3) +// f5 = dt*f(t+dt,x + c51*f1 + c52*f2 + c53*f3 + c54*f4) +// f6 = dt*f(t+ c60*dt,x + c61*f1 + c62*f2 + c63*f3 + c64*f4 + c65*f5) +// +// fifth-order runge-kutta integration +// x5 = x + b1*f1 + b3*f3 + b4*f4 + b5*f5 + b6*f6 +// fourth-order runge-kutta integration +// x = x + a1*f1 + a3*f3 + a4*f4 + a5*f5 + +template +void FixRxKokkos::rkf45_step (const int neq, const double h, double y[], double y_out[], double rwk[], void* v_param) const +{ + const double c21=0.25; + const double c31=0.09375; + const double c32=0.28125; + const double c41=0.87938097405553; + const double c42=-3.2771961766045; + const double c43=3.3208921256258; + const double c51=2.0324074074074; + const double c52=-8.0; + const double c53=7.1734892787524; + const double c54=-0.20589668615984; + const double c61=-0.2962962962963; + const double c62=2.0; + const double c63=-1.3816764132554; + const double c64=0.45297270955166; + const double c65=-0.275; + const double a1=0.11574074074074; + const double a3=0.54892787524366; + const double a4=0.5353313840156; + const double a5=-0.2; + const double b1=0.11851851851852; + const double b3=0.51898635477583; + const double b4=0.50613149034201; + const double b5=-0.18; + const double b6=0.036363636363636; + + // local dependent variables (5 total) + double* f1 = &rwk[ 0]; + double* f2 = &rwk[ neq]; + double* f3 = &rwk[2*neq]; + double* f4 = &rwk[3*neq]; + double* f5 = &rwk[4*neq]; + double* f6 = &rwk[5*neq]; + + // scratch for the intermediate solution. + //double* ytmp = &rwk[6*neq]; + double* ytmp = y_out; + + // 1) + rhs (0.0, y, f1, v_param); + + for (int k = 0; k < neq; k++){ + f1[k] *= h; + ytmp[k] = y[k] + c21 * f1[k]; + } + + // 2) + rhs(0.0, ytmp, f2, v_param); + + for (int k = 0; k < neq; k++){ + f2[k] *= h; + ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k]; + } + + // 3) + rhs(0.0, ytmp, f3, v_param); + + for (int k = 0; k < neq; k++) { + f3[k] *= h; + ytmp[k] = y[k] + c41 * f1[k] + c42 * f2[k] + c43 * f3[k]; + } + + // 4) + rhs(0.0, ytmp, f4, v_param); + + for (int k = 0; k < neq; k++) { + f4[k] *= h; + ytmp[k] = y[k] + c51 * f1[k] + c52 * f2[k] + c53 * f3[k] + c54 * f4[k]; + } + + // 5) + rhs(0.0, ytmp, f5, v_param); + + for (int k = 0; k < neq; k++) { + f5[k] *= h; + ytmp[k] = y[k] + c61*f1[k] + c62*f2[k] + c63*f3[k] + c64*f4[k] + c65*f5[k]; + } + + // 6) + rhs(0.0, ytmp, f6, v_param); + + for (int k = 0; k < neq; k++) + { + //const double f6 = h * ydot[k]; + f6[k] *= h; + + // 5th-order solution. + const double r5 = b1*f1[k] + b3*f3[k] + b4*f4[k] + b5*f5[k] + b6*f6[k]; + + // 4th-order solution. + const double r4 = a1*f1[k] + a3*f3[k] + a4*f4[k] + a5*f5[k]; + + // Truncation error: difference between 4th and 5th-order solutions. + rwk[k] = fabs(r5 - r4); + + // Update solution. + //y_out[k] = y[k] + r5; // Local extrapolation + y_out[k] = y[k] + r4; + } + + return; +} + +template +int FixRxKokkos::rkf45_h0 + (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], void* v_params) const +{ + // Set lower and upper bounds on h0, and take geometric mean as first trial value. + // Exit with this value if the bounds cross each other. + + // Adjust upper bound based on ydot ... + double hg = sqrt(hmin*hmax); + + //if (hmax < hmin) + //{ + // h0 = hg; + // return; + //} + + // Start iteration to find solution to ... {WRMS norm of (h0^2 y'' / 2)} = 1 + + double *ydot = rwk; + double *y1 = ydot + neq; + double *ydot1 = y1 + neq; + + const int max_iters = 10; + bool hnew_is_ok = false; + double hnew = hg; + int iter = 0; + + // compute ydot at t=t0 + rhs (t, y, ydot, v_params); + + while(1) + { + // Estimate y'' with finite-difference ... + + for (int k = 0; k < neq; k++) + y1[k] = y[k] + hg * ydot[k]; + + // compute y' at t1 + rhs (t + hg, y1, ydot1, v_params); + + // Compute WRMS norm of y'' + double yddnrm = 0.0; + for (int k = 0; k < neq; k++){ + double ydd = (ydot1[k] - ydot[k]) / hg; + double wterr = ydd / (relTol * fabs( y[k] ) + absTol); + yddnrm += wterr * wterr; + } + + yddnrm = sqrt( yddnrm / double(neq) ); + + //std::cout << "iter " << _iter << " hg " << hg << " y'' " << yddnrm << std::endl; + //std::cout << "ydot " << ydot[neq-1] << std::endl; + + // should we accept this? + if (hnew_is_ok || iter == max_iters){ + hnew = hg; + if (iter == max_iters) + fprintf(stderr, "ERROR_HIN_MAX_ITERS\n"); + break; + } + + // Get the new value of h ... + hnew = (yddnrm*hmax*hmax > 2.0) ? sqrt(2.0 / yddnrm) : sqrt(hg * hmax); + + // test the stopping conditions. + double hrat = hnew / hg; + + // Accept this value ... the bias factor should bring it within range. + if ( (hrat > 0.5) && (hrat < 2.0) ) + hnew_is_ok = true; + + // If y'' is still bad after a few iterations, just accept h and give up. + if ( (iter > 1) && hrat > 2.0 ) { + hnew = hg; + hnew_is_ok = true; + } + + //printf("iter=%d, yddnrw=%e, hnew=%e, hmin=%e, hmax=%e\n", iter, yddnrm, hnew, hmin, hmax); + + hg = hnew; + iter ++; + } + + // bound and bias estimate + h0 = hnew * 0.5; + h0 = fmax(h0, hmin); + h0 = fmin(h0, hmax); + //printf("h0=%e, hmin=%e, hmax=%e\n", h0, hmin, hmax); + + return (iter + 1); +} + +template +void FixRxKokkos::rkf45(const int neq, const double t_stop, double *y, double *rwork, void *v_param, CounterType& counter) const +{ + // Rounding coefficient. + const double uround = DBL_EPSILON; + + // Adaption limit (shrink or grow) + const double adaption_limit = 4.0; + + // Safety factor on the adaption. very specific but not necessary .. 0.9 is common. + const double hsafe = 0.840896415; + + // Time rounding factor. + const double tround = t_stop * uround; + + // Counters for diagnostics. + int nst = 0; // # of steps (accepted) + int nit = 0; // # of iterations total + int nfe = 0; // # of RHS evaluations + + // Min/Max step-size limits. + const double h_min = 100.0 * tround; + const double h_max = (minSteps > 0) ? t_stop / double(minSteps) : t_stop; + + // Set the initial step-size. 0 forces an internal estimate ... stable Euler step size. + double h = (minSteps > 0) ? t_stop / double(minSteps) : 0.0; + + double t = 0.0; + + if (h < h_min){ + //fprintf(stderr,"hin not implemented yet\n"); + //exit(-1); + nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, v_param); + } + + //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h); + + // Integrate until we reach the end time. + while (fabs(t - t_stop) > tround){ + double *yout = rwork; + double *eout = yout + neq; + + // Take a trial step. + rkf45_step (neq, h, y, yout, eout, v_param); + + // Estimate the solution error. + // ... weighted 2-norm of the error. + double err2 = 0.0; + for (int k = 0; k < neq; k++){ + const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol); + err2 += wterr * wterr; + } + + double err = fmax( uround, sqrt( err2 / double(nspecies) )); + + // Accept the solution? + if (err <= 1.0 || h <= h_min){ + t += h; + nst++; + + for (int k = 0; k < neq; k++) + y[k] = yout[k]; + } + + // Adjust h for the next step. + double hfac = hsafe * sqrt( sqrt( 1.0 / err ) ); + + // Limit the adaption. + hfac = fmax( hfac, 1.0 / adaption_limit ); + hfac = fmin( hfac, adaption_limit ); + + // Apply the adaption factor... + h *= hfac; + + // Limit h. + h = fmin( h, h_max ); + h = fmax( h, h_min ); + + // Stretch h if we're within 5% ... and we didn't just fail. + if (err <= 1.0 && (t + 1.05*h) > t_stop) + h = t_stop - t; + + // And don't overshoot the end. + if (t + h > t_stop) + h = t_stop - t; + + nit++; + nfe += 6; + + if (maxIters && nit > maxIters){ + //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop); + counter.nFails ++; + break; + // We should set an error here so that the solution is not used! + } + + } // end while + + counter.nSteps += nst; + counter.nIters += nit; + counter.nFuncs += nfe; + + //printf("id= %d nst= %d nit= %d\n", id, nst, nit); +} + +/* ---------------------------------------------------------------------- */ + +template +int FixRxKokkos::rhs(double t, const double *y, double *dydt, void *params) const +{ + // Use the sparse format instead. + if (useSparseKinetics) + return this->rhs_sparse( t, y, dydt, params); + else + return this->rhs_dense ( t, y, dydt, params); +} + +/* ---------------------------------------------------------------------- */ + +template +int FixRxKokkos::rhs_dense(double t, const double *y, double *dydt, void *params) const +{ + UserRHSData *userData = (UserRHSData *) params; + + double *rxnRateLaw = userData->rxnRateLaw; + double *kFor = userData->kFor; + + const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + const int nspecies = atom->nspecies_dpd; + + for(int ispecies=0; ispecies +int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, void *v_params) const +{ + UserRHSData *userData = (UserRHSData *) v_params; + + const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + + #define kFor (userData->kFor) + #define kRev (NULL) + #define rxnRateLaw (userData->rxnRateLaw) + #define conc (dydt) + #define maxReactants (this->sparseKinetics_maxReactants) + #define maxSpecies (this->sparseKinetics_maxSpecies) + #define nuk (this->sparseKinetics_nuk) + #define nu (this->sparseKinetics_nu) + #define inu (this->sparseKinetics_inu) + #define isIntegral(idx) (SparseKinetics_enableIntegralReactions \ + && this->sparseKinetics_isIntegralReaction[idx]) + + for (int k = 0; k < nspecies; ++k) + conc[k] = y[k] / VDPD; + + // Construct the reaction rate laws + for (int i = 0; i < nreactions; ++i) + { + double rxnRateLawForward; + if (isIntegral(i)){ + rxnRateLawForward = kFor[i] * powint( conc[ nuk[i][0] ], inu[i][0]); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk[i][kk]; + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= powint( conc[k], inu[i][kk] ); + } + } else { + rxnRateLawForward = kFor[i] * pow( conc[ nuk[i][0] ], nu[i][0]); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk[i][kk]; + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= pow( conc[k], nu[i][kk] ); + } + } + + rxnRateLaw[i] = rxnRateLawForward; + } + + // Construct the reaction rates for each species from the + // Stoichiometric matrix and ROP vector. + for (int k = 0; k < nspecies; ++k) + dydt[k] = 0.0; + + for (int i = 0; i < nreactions; ++i){ + // Reactants ... + dydt[ nuk[i][0] ] -= nu[i][0] * rxnRateLaw[i]; + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk[i][kk]; + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] -= nu[i][kk] * rxnRateLaw[i]; + } + + // Products ... + dydt[ nuk[i][maxReactants] ] += nu[i][maxReactants] * rxnRateLaw[i]; + for (int kk = maxReactants+1; kk < maxSpecies; ++kk){ + const int k = nuk[i][kk]; + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] += nu[i][kk] * rxnRateLaw[i]; + } + } + + // Add in the volume factor to convert to the proper units. + for (int k = 0; k < nspecies; ++k) + dydt[k] *= VDPD; + + #undef kFor + #undef kRev + #undef rxnRateLaw + #undef conc + #undef maxReactants + #undef maxSpecies + #undef nuk + #undef nu + #undef inu + #undef isIntegral + //#undef invalidIndex + + return 0; +} + +/* ---------------------------------------------------------------------- */ + +/*template + template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(SolverType, const int &i) const +{ + if (atom->mask[i] & groupbit) + { + double *rwork = new double[8*nspecies]; + + UserRHSData userData; + userData.kFor = new double[nreactions]; + userData.rxnRateLaw = new double[nreactions]; + + int ode_counter[4] = { 0 }; + + const double theta = (localTempFlag) ? dpdThetaLocal[i] : atom->dpdTheta[i]; + + //Compute the reaction rate constants + for (int irxn = 0; irxn < nreactions; irxn++) + { + if (SolverType::setToZero) + userData.kFor[irxn] = 0.0; + else + userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta); + } + + if (odeIntegrationFlag == ODE_LAMMPS_RK4) + rk4(i, rwork, &userData); + else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) + rkf45(i, rwork, &userData, ode_counter); + + delete [] rwork; + delete [] userData.kFor; + delete [] userData.rxnRateLaw; + } +} */ + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::solve_reactions(void) +{ +/* int nlocal = atom->nlocal; + if (igroup == atom->firstgroup) nlocal = atom->nfirst; + + using AT = ArrayTypes; + + atomKK->sync(execution_space, UCOND_MASK); + typename AT::t_efloat_1d uCond = atomKK->k_uCond.view(); + atomKK->sync(execution_space, UMECH_MASK); + typename AT::t_efloat_1d uMech = atomKK->k_uMech.view(); + + pairDPDEKK->k_duCond.template sync(); + typename AT::t_efloat_1d_const duCond = pairDPDEKK->k_duCond.template view(); + pairDPDEKK->k_duMech.template sync(); + typename AT::t_efloat_1d_const duMech = pairDPDEKK->k_duMech.template view(); + + auto dt = update->dt; + + Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) { + uCond(i) += 0.5*dt*duCond(i); + uMech(i) += 0.5*dt*duMech(i); + }); + + atomKK->modified(execution_space, UCOND_MASK); + atomKK->modified(execution_space, UMECH_MASK); */ +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::create_kinetics_data(void) +{ + printf("Inside FixRxKokkos::create_kinetics_data\n"); + + memory->create_kokkos( d_kinetics_data.Arr, h_kinetics_data.Arr, nreactions, "KineticsType::Arr"); + memory->create_kokkos( d_kinetics_data.nArr, h_kinetics_data.nArr, nreactions, "KineticsType::nArr"); + memory->create_kokkos( d_kinetics_data.Ea, h_kinetics_data.Ea, nreactions, "KineticsType::Ea"); + + memory->create_kokkos( d_kinetics_data.stoich, h_kinetics_data.stoich, nreactions, nspecies, "KineticsType::stoich"); + memory->create_kokkos( d_kinetics_data.stoichReactants, h_kinetics_data.stoichReactants, nreactions, nspecies, "KineticsType::stoichReactants"); + memory->create_kokkos( d_kinetics_data.stoichProducts, h_kinetics_data.stoichProducts, nreactions, nspecies, "KineticsType::stoichProducts"); + + for (int i = 0; i < nreactions; ++i) + { + h_kinetics_data.Arr[i] = Arr[i]; + h_kinetics_data.nArr[i] = nArr[i]; + h_kinetics_data.Ea[i] = Ea[i]; + + for (int k = 0; k < nspecies; ++k) + { + h_kinetics_data.stoich(i,k) = stoich[i][k]; + h_kinetics_data.stoichReactants(i,k) = stoichReactants[i][k]; + h_kinetics_data.stoichProducts(i,k) = stoichProducts[i][k]; + } + } + + Kokkos::deep_copy( d_kinetics_data.Arr, h_kinetics_data.Arr ); + Kokkos::deep_copy( d_kinetics_data.nArr, h_kinetics_data.nArr ); + Kokkos::deep_copy( d_kinetics_data.Ea, h_kinetics_data.Ea ); + Kokkos::deep_copy( d_kinetics_data.stoich, h_kinetics_data.stoich ); + Kokkos::deep_copy( d_kinetics_data.stoichReactants, h_kinetics_data.stoichReactants ); + Kokkos::deep_copy( d_kinetics_data.stoichProducts, h_kinetics_data.stoichProducts ); + + update_kinetics_data = false; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::pre_force(int vflag) +{ + printf("Inside FixRxKokkos::pre_force localTempFlag= %d\n", localTempFlag); + + if (update_kinetics_data) + create_kinetics_data(); + + TimerType timer_start = getTimeStamp(); + + int nlocal = atom->nlocal; + int nghost = atom->nghost; + int newton_pair = force->newton_pair; + + const bool setToZero = false; // don't set the forward rates to zero. + + if(localTempFlag){ + int count = nlocal + (newton_pair ? nghost : 0); + dpdThetaLocal = new double[count]; + memset(dpdThetaLocal, 0, sizeof(double)*count); + computeLocalTemperature(); + } + + TimerType timer_localTemperature = getTimeStamp(); + + // Total counters from the ODE solvers. + CounterType Counters; + + // Set data needed in the operators. + int *mask = atom->mask; + double *dpdTheta = atom->dpdTheta; + + const double boltz = force->boltz; + const double t_stop = update->dt; // DPD time-step and integration length. + + /*if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) + { + memory->create( diagnosticCounterPerODE[StepSum], nlocal, "FixRX::diagnosticCounterPerODE"); + memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE"); + }*/ + + Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) + { + if (mask[i] & groupbit) + { + double *y = new double[8*nspecies]; + double *rwork = y + nspecies; + + UserRHSData userData; + userData.kFor = new double[nreactions]; + userData.rxnRateLaw = new double[nreactions]; + + CounterType counter_i; + + const double theta = (localTempFlag) ? dpdThetaLocal[i] : dpdTheta[i]; + + //Compute the reaction rate constants + for (int irxn = 0; irxn < nreactions; irxn++) + { + if (setToZero) + userData.kFor[irxn] = 0.0; + else + { + userData.kFor[irxn] = d_kinetics_data.Arr(irxn) * + pow(theta, d_kinetics_data.nArr(irxn)) * + exp(-d_kinetics_data.Ea(irxn) / boltz / theta); + //userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/boltz/theta); + } + } + + // Update ConcOld and initialize the ODE solution vector y[]. + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + const double tmp = atom->dvector[ispecies][i]; + atom->dvector[ispecies+nspecies][i] = tmp; + y[ispecies] = tmp; + } + + // Solver the ODE system. + if (odeIntegrationFlag == ODE_LAMMPS_RK4) + { + rk4(t_stop, y, rwork, &userData); + + /* This should be a duplicate of the copy-out in the + rkf45 block but for the MY_EPSILON v. -1e-10 (literal) + difference. Can these be merged? */ + + // Store the solution back in atom->dvector. + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + if(y[ispecies] < -MY_EPSILON) + error->one(FLERR,"Computed concentration in RK4 solver is < -10*DBL_EPSILON"); + else if(y[ispecies] < MY_EPSILON) + y[ispecies] = 0.0; + atom->dvector[ispecies][i] = y[ispecies]; + } + } + else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) + { + rkf45(nspecies, t_stop, y, rwork, &userData, counter_i); + + // Store the solution back in atom->dvector. + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + if(y[ispecies] < -1.0e-10) + error->one(FLERR,"Computed concentration in RKF45 solver is < -1.0e-10"); + else if(y[ispecies] < MY_EPSILON) + y[ispecies] = 0.0; + atom->dvector[ispecies][i] = y[ispecies]; + } + + //if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL) + if (diagnosticCounterPerODE[StepSum] != NULL) + { + diagnosticCounterPerODE[StepSum][i] = counter_i.nSteps; + diagnosticCounterPerODE[FuncSum][i] = counter_i.nFuncs; + } + } + + delete [] y; + delete [] userData.kFor; + delete [] userData.rxnRateLaw; + + counter += counter_i; + } // if + } // parallel_for lambda-body + + , Counters // reduction value + ); + + TimerType timer_ODE = getTimeStamp(); + + // Communicate the updated momenta and velocities to all nodes + comm->forward_comm_fix(this); + if(localTempFlag) delete [] dpdThetaLocal; + + TimerType timer_stop = getTimeStamp(); + + double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE); + + printf("me= %d kokkos total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, + getElapsedTime(timer_start, timer_stop), + getElapsedTime(timer_start, timer_localTemperature), + getElapsedTime(timer_localTemperature, timer_ODE), + getElapsedTime(timer_ODE, timer_stop), nlocal, Counters.nFuncs, Counters.nSteps); + + // Warn the user if a failure was detected in the ODE solver. + if (Counters.nFails > 0){ + char sbuf[128]; + sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", Counters.nFails); + error->warning(FLERR, sbuf); + } + +/* + // Compute and report ODE diagnostics, if requested. + if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency != 0){ + // Update the counters. + diagnosticCounter[StepSum] += nSteps; + diagnosticCounter[FuncSum] += nFuncs; + diagnosticCounter[TimeSum] += time_ODE; + diagnosticCounter[AtomSum] += nlocal; + diagnosticCounter[numDiagnosticCounters-1] ++; + + if ( (diagnosticFrequency > 0 && + ((update->ntimestep - update->firststep) % diagnosticFrequency) == 0) || + (diagnosticFrequency < 0 && update->ntimestep == update->laststep) ) + this->odeDiagnostics(); + + for (int i = 0; i < numDiagnosticCounters; ++i) + if (diagnosticCounterPerODE[i]) + memory->destroy( diagnosticCounterPerODE[i] ); + } */ +} + +namespace LAMMPS_NS { +template class FixRxKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixRxKokkos; +#endif +} diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h new file mode 100644 index 0000000000..4a41644257 --- /dev/null +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -0,0 +1,124 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(rx/kk,FixRxKokkos) +FixStyle(rx/kk/device,FixRxKokkos) +FixStyle(rx/kk/host,FixRxKokkos) + +#else + +#ifndef LMP_FIX_RX_KOKKOS_H +#define LMP_FIX_RX_KOKKOS_H + +#include "fix_rx.h" +#include "pair_dpd_fdt_energy_kokkos.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +struct TagFixRxKokkosSolver +{ + enum { setToZero = (_setToZero == true) ? 1 : 0 }; +}; + +template +class FixRxKokkos : public FixRX { + public: + FixRxKokkos(class LAMMPS *, int, char **); + virtual ~FixRxKokkos(); + virtual void init(); + virtual void pre_force(int); + + //template + // KOKKOS_INLINE_FUNCTION + //void operator()(SolverTag, const int&) const; + + struct CounterType + { + int nSteps, nIters, nFuncs, nFails; + + CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {}; + + KOKKOS_INLINE_FUNCTION + CounterType& operator+=(const CounterType &rhs) + { + nSteps += rhs.nSteps; + nIters += rhs.nIters; + nFuncs += rhs.nFuncs; + nFails += rhs.nFails; + return *this; + } + + KOKKOS_INLINE_FUNCTION + volatile CounterType& operator+=(const volatile CounterType &rhs) volatile + { + nSteps += rhs.nSteps; + nIters += rhs.nIters; + nFuncs += rhs.nFuncs; + nFails += rhs.nFails; + return *this; + } + }; + + protected: + PairDPDfdtEnergyKokkos* pairDPDEKK; + + void solve_reactions(void); + + int rhs(double, const double *, double *, void *) const; + int rhs_dense (double, const double *, double *, void *) const; + int rhs_sparse(double, const double *, double *, void *) const; + + //!< Classic Runge-Kutta 4th-order stepper. + void rk4(const double t_stop, double *y, double *rwork, void *v_params) const; + + //!< Runge-Kutta-Fehlberg ODE Solver. + void rkf45(const int neq, const double t_stop, double *y, double *rwork, void *v_params, CounterType& counter) const; + + //!< Runge-Kutta-Fehlberg ODE stepper function. + void rkf45_step (const int neq, const double h, double y[], double y_out[], + double rwk[], void *) const; + + //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. + int rkf45_h0 (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], void *v_params) const; + + template + struct KineticsType + { + typename ArrayTypes::t_float_1d Arr, nArr, Ea; + typename ArrayTypes::t_float_2d stoich, stoichReactants, stoichProducts; + }; + + //!< Kokkos versions of the kinetics data. + KineticsType h_kinetics_data; + KineticsType d_kinetics_data; + + bool update_kinetics_data; + + void create_kinetics_data(void); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ From 41d3903f5a7226ef4b30d3fd6b818123354300d9 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sun, 22 Jan 2017 22:49:21 -0500 Subject: [PATCH 109/267] Added kokkos-managed parameters for FixRxKokkos. - Added kokkos-managed parameter data for the kinetics equations. - Removed dependencies in rhs() on atom and domain objects. TODO: 1. Switch to using KOKKOS data for dvector. 2. Port ComputeLocalTemp(...) to Kokkos (needs pairing algorithm). --- src/KOKKOS/fix_rx_kokkos.cpp | 135 +++++++++++++++++++++++------------ src/KOKKOS/fix_rx_kokkos.h | 13 +++- 2 files changed, 101 insertions(+), 47 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index f8a10dff93..b989d6b2d4 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -475,8 +475,8 @@ int FixRxKokkos::rhs_dense(double t, const double *y, double *dydt, double *rxnRateLaw = userData->rxnRateLaw; double *kFor = userData->kFor; - const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; - const int nspecies = atom->nspecies_dpd; + //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + //const int nspecies = atom->nspecies_dpd; for(int ispecies=0; ispecies::rhs_dense(double t, const double *y, double *dydt, for(int ispecies=0; ispecies::rhs_dense(double t, const double *y, double *dydt, for(int ispecies=0; ispecies::rhs_sparse(double t, const double *y, double *dydt, { UserRHSData *userData = (UserRHSData *) v_params; - const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; #define kFor (userData->kFor) #define kRev (NULL) @@ -519,11 +519,11 @@ int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, #define conc (dydt) #define maxReactants (this->sparseKinetics_maxReactants) #define maxSpecies (this->sparseKinetics_maxSpecies) - #define nuk (this->sparseKinetics_nuk) - #define nu (this->sparseKinetics_nu) - #define inu (this->sparseKinetics_inu) - #define isIntegral(idx) (SparseKinetics_enableIntegralReactions \ - && this->sparseKinetics_isIntegralReaction[idx]) + #define nuk (this->d_kineticsData.nuk) + #define nu (this->d_kineticsData.nu) + #define inu (this->d_kineticsData.inu) + #define isIntegral(idx) ( SparseKinetics_enableIntegralReactions \ + && this->d_kineticsData.isIntegral(idx) ) for (int k = 0; k < nspecies; ++k) conc[k] = y[k] / VDPD; @@ -533,20 +533,20 @@ int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, { double rxnRateLawForward; if (isIntegral(i)){ - rxnRateLawForward = kFor[i] * powint( conc[ nuk[i][0] ], inu[i][0]); + rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) ); for (int kk = 1; kk < maxReactants; ++kk){ - const int k = nuk[i][kk]; + const int k = nuk(i,kk); if (k == SparseKinetics_invalidIndex) break; //if (k != SparseKinetics_invalidIndex) - rxnRateLawForward *= powint( conc[k], inu[i][kk] ); + rxnRateLawForward *= powint( conc[k], inu(i,kk) ); } } else { - rxnRateLawForward = kFor[i] * pow( conc[ nuk[i][0] ], nu[i][0]); + rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) ); for (int kk = 1; kk < maxReactants; ++kk){ - const int k = nuk[i][kk]; + const int k = nuk(i,kk); if (k == SparseKinetics_invalidIndex) break; //if (k != SparseKinetics_invalidIndex) - rxnRateLawForward *= pow( conc[k], nu[i][kk] ); + rxnRateLawForward *= pow( conc[k], nu(i,kk) ); } } @@ -560,21 +560,21 @@ int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, for (int i = 0; i < nreactions; ++i){ // Reactants ... - dydt[ nuk[i][0] ] -= nu[i][0] * rxnRateLaw[i]; + dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i]; for (int kk = 1; kk < maxReactants; ++kk){ - const int k = nuk[i][kk]; + const int k = nuk(i,kk); if (k == SparseKinetics_invalidIndex) break; //if (k != SparseKinetics_invalidIndex) - dydt[k] -= nu[i][kk] * rxnRateLaw[i]; + dydt[k] -= nu(i,kk) * rxnRateLaw[i]; } // Products ... - dydt[ nuk[i][maxReactants] ] += nu[i][maxReactants] * rxnRateLaw[i]; + dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i]; for (int kk = maxReactants+1; kk < maxSpecies; ++kk){ - const int k = nuk[i][kk]; + const int k = nuk(i,kk); if (k == SparseKinetics_invalidIndex) break; //if (k != SparseKinetics_invalidIndex) - dydt[k] += nu[i][kk] * rxnRateLaw[i]; + dydt[k] += nu(i,kk) * rxnRateLaw[i]; } } @@ -674,34 +674,76 @@ void FixRxKokkos::create_kinetics_data(void) { printf("Inside FixRxKokkos::create_kinetics_data\n"); - memory->create_kokkos( d_kinetics_data.Arr, h_kinetics_data.Arr, nreactions, "KineticsType::Arr"); - memory->create_kokkos( d_kinetics_data.nArr, h_kinetics_data.nArr, nreactions, "KineticsType::nArr"); - memory->create_kokkos( d_kinetics_data.Ea, h_kinetics_data.Ea, nreactions, "KineticsType::Ea"); - - memory->create_kokkos( d_kinetics_data.stoich, h_kinetics_data.stoich, nreactions, nspecies, "KineticsType::stoich"); - memory->create_kokkos( d_kinetics_data.stoichReactants, h_kinetics_data.stoichReactants, nreactions, nspecies, "KineticsType::stoichReactants"); - memory->create_kokkos( d_kinetics_data.stoichProducts, h_kinetics_data.stoichProducts, nreactions, nspecies, "KineticsType::stoichProducts"); + memory->create_kokkos( d_kineticsData.Arr, h_kineticsData.Arr, nreactions, "KineticsType::Arr"); + memory->create_kokkos( d_kineticsData.nArr, h_kineticsData.nArr, nreactions, "KineticsType::nArr"); + memory->create_kokkos( d_kineticsData.Ea, h_kineticsData.Ea, nreactions, "KineticsType::Ea"); for (int i = 0; i < nreactions; ++i) { - h_kinetics_data.Arr[i] = Arr[i]; - h_kinetics_data.nArr[i] = nArr[i]; - h_kinetics_data.Ea[i] = Ea[i]; + h_kineticsData.Arr[i] = Arr[i]; + h_kineticsData.nArr[i] = nArr[i]; + h_kineticsData.Ea[i] = Ea[i]; + } - for (int k = 0; k < nspecies; ++k) + Kokkos::deep_copy( d_kineticsData.Arr, h_kineticsData.Arr ); + Kokkos::deep_copy( d_kineticsData.nArr, h_kineticsData.nArr ); + Kokkos::deep_copy( d_kineticsData.Ea, h_kineticsData.Ea ); + + if (useSparseKinetics) + { + + memory->create_kokkos( d_kineticsData.nu , h_kineticsData.nu , nreactions, sparseKinetics_maxSpecies, "KineticsType::nu"); + memory->create_kokkos( d_kineticsData.nuk, h_kineticsData.nuk, nreactions, sparseKinetics_maxSpecies, "KineticsType::nuk"); + + for (int i = 0; i < nreactions; ++i) + for (int k = 0; k < sparseKinetics_maxSpecies; ++k) + { + h_kineticsData.nu (i,k) = sparseKinetics_nu [i][k]; + h_kineticsData.nuk(i,k) = sparseKinetics_nuk[i][k]; + } + + Kokkos::deep_copy( d_kineticsData.nu, h_kineticsData.nu ); + Kokkos::deep_copy( d_kineticsData.nuk, h_kineticsData.nuk ); + + if (SparseKinetics_enableIntegralReactions) { - h_kinetics_data.stoich(i,k) = stoich[i][k]; - h_kinetics_data.stoichReactants(i,k) = stoichReactants[i][k]; - h_kinetics_data.stoichProducts(i,k) = stoichProducts[i][k]; + memory->create_kokkos( d_kineticsData.inu, h_kineticsData.inu, nreactions, sparseKinetics_maxSpecies, "KineticsType::inu"); + memory->create_kokkos( d_kineticsData.isIntegral, h_kineticsData.isIntegral, nreactions, "KineticsType::isIntegral"); + + for (int i = 0; i < nreactions; ++i) + { + h_kineticsData.isIntegral(i) = sparseKinetics_isIntegralReaction[i]; + + for (int k = 0; k < sparseKinetics_maxSpecies; ++k) + h_kineticsData.inu(i,k) = sparseKinetics_inu[i][k]; + } + + Kokkos::deep_copy( d_kineticsData.inu, h_kineticsData.inu ); + Kokkos::deep_copy( d_kineticsData.isIntegral, h_kineticsData.isIntegral ); } } - Kokkos::deep_copy( d_kinetics_data.Arr, h_kinetics_data.Arr ); - Kokkos::deep_copy( d_kinetics_data.nArr, h_kinetics_data.nArr ); - Kokkos::deep_copy( d_kinetics_data.Ea, h_kinetics_data.Ea ); - Kokkos::deep_copy( d_kinetics_data.stoich, h_kinetics_data.stoich ); - Kokkos::deep_copy( d_kinetics_data.stoichReactants, h_kinetics_data.stoichReactants ); - Kokkos::deep_copy( d_kinetics_data.stoichProducts, h_kinetics_data.stoichProducts ); + //else + //{ + + // Dense option + memory->create_kokkos( d_kineticsData.stoich, h_kineticsData.stoich, nreactions, nspecies, "KineticsType::stoich"); + memory->create_kokkos( d_kineticsData.stoichReactants, h_kineticsData.stoichReactants, nreactions, nspecies, "KineticsType::stoichReactants"); + memory->create_kokkos( d_kineticsData.stoichProducts, h_kineticsData.stoichProducts, nreactions, nspecies, "KineticsType::stoichProducts"); + + for (int i = 0; i < nreactions; ++i) + for (int k = 0; k < nspecies; ++k) + { + h_kineticsData.stoich(i,k) = stoich[i][k]; + h_kineticsData.stoichReactants(i,k) = stoichReactants[i][k]; + h_kineticsData.stoichProducts(i,k) = stoichProducts[i][k]; + } + + Kokkos::deep_copy( d_kineticsData.stoich, h_kineticsData.stoich ); + Kokkos::deep_copy( d_kineticsData.stoichReactants, h_kineticsData.stoichReactants ); + Kokkos::deep_copy( d_kineticsData.stoichProducts, h_kineticsData.stoichProducts ); + + //} update_kinetics_data = false; } @@ -743,6 +785,9 @@ void FixRxKokkos::pre_force(int vflag) const double boltz = force->boltz; const double t_stop = update->dt; // DPD time-step and integration length. + // Average DPD volume. Used in the RHS function. + this->VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + /*if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) { memory->create( diagnosticCounterPerODE[StepSum], nlocal, "FixRX::diagnosticCounterPerODE"); @@ -771,9 +816,9 @@ void FixRxKokkos::pre_force(int vflag) userData.kFor[irxn] = 0.0; else { - userData.kFor[irxn] = d_kinetics_data.Arr(irxn) * - pow(theta, d_kinetics_data.nArr(irxn)) * - exp(-d_kinetics_data.Ea(irxn) / boltz / theta); + userData.kFor[irxn] = d_kineticsData.Arr(irxn) * + pow(theta, d_kineticsData.nArr(irxn)) * + exp(-d_kineticsData.Ea(irxn) / boltz / theta); //userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/boltz/theta); } } diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 4a41644257..95872c67e9 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -75,6 +75,7 @@ class FixRxKokkos : public FixRX { protected: PairDPDfdtEnergyKokkos* pairDPDEKK; + double VDPD; void solve_reactions(void); @@ -100,13 +101,21 @@ class FixRxKokkos : public FixRX { template struct KineticsType { + // Arrhenius rate coefficients. typename ArrayTypes::t_float_1d Arr, nArr, Ea; + + // Dense versions. typename ArrayTypes::t_float_2d stoich, stoichReactants, stoichProducts; + + // Sparse versions. + typename ArrayTypes::t_int_2d nuk, inu; + typename ArrayTypes::t_float_2d nu; + typename ArrayTypes::t_int_1d isIntegral; }; //!< Kokkos versions of the kinetics data. - KineticsType h_kinetics_data; - KineticsType d_kinetics_data; + KineticsType h_kineticsData; + KineticsType d_kineticsData; bool update_kinetics_data; From 70fa9189a8c8d74ac1dc09084c15260aaa204612 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Tue, 24 Jan 2017 21:49:16 -0500 Subject: [PATCH 110/267] Updated KOKKOS installer and updated USER-DPD FixRx to match KOKKOS version. - Updated the KOKKOS installer to include the fix_rx_kokkos.[cpp,h]. - Updated the USER-DPD version of fix_rx.[cpp,h] to sync with the Kokkos version. Solves child->parent class dependencies. --- src/KOKKOS/Install.sh | 2 + src/USER-DPD/fix_rx.cpp | 244 +++++++++++++++++++++++++++++----------- src/USER-DPD/fix_rx.h | 23 ++-- 3 files changed, 193 insertions(+), 76 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index f53f8624c4..db4fcf8ddc 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -103,6 +103,8 @@ action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp action fix_dpd_energy_kokkos.h fix_dpd_energy.h +action fix_rx_kokkos.cpp fix_rx.cpp +action fix_rx_kokkos.h fix_rx.h action gridcomm_kokkos.cpp gridcomm.cpp action gridcomm_kokkos.h gridcomm.h action improper_harmonic_kokkos.cpp improper_harmonic.cpp diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index a55ae78110..28321dbecf 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -673,7 +673,17 @@ void FixRX::setup_pre_force(int vflag) if(restartFlag){ restartFlag = 0; - } else { + } + else + { + int ode_counter[4] = {0}; + + UserRHSData userData; + userData.kFor = new double[nreactions]; + userData.rxnRateLaw = new double[nreactions]; + + double *rwork = new double[8*nspecies]; + if(localTempFlag){ int count = nlocal + (newton_pair ? nghost : 0); dpdThetaLocal = new double[count]; @@ -686,22 +696,27 @@ void FixRX::setup_pre_force(int vflag) tmp = atom->dvector[ispecies][id]; atom->dvector[ispecies+nspecies][id] = tmp; } + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit){ // Set the reaction rate constants to zero: no reactions occur at step 0 for(int irxn=0;irxnforward_comm_fix(this); if(localTempFlag) delete [] dpdThetaLocal; + + delete [] userData.kFor; + delete [] userData.rxnRateLaw; + delete [] rwork; } } @@ -709,12 +724,13 @@ void FixRX::setup_pre_force(int vflag) void FixRX::pre_force(int vflag) { + TimerType timer_start = getTimeStamp(); + int nlocal = atom->nlocal; int nghost = atom->nghost; int *mask = atom->mask; double *dpdTheta = atom->dpdTheta; int newton_pair = force->newton_pair; - double theta; if(localTempFlag){ int count = nlocal + (newton_pair ? nghost : 0); @@ -726,7 +742,10 @@ void FixRX::pre_force(int vflag) TimerType timer_localTemperature = getTimeStamp(); // Zero the counters for the ODE solvers. - this->nSteps = this->nIters = this->nFuncs = this->nFails = 0; + int nSteps = 0; + int nIters = 0; + int nFuncs = 0; + int nFails = 0; if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) { @@ -734,35 +753,66 @@ void FixRX::pre_force(int vflag) memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE"); } - double *rwork = new double[8*nspecies + nreactions]; + #pragma omp parallel \ + reduction(+: nSteps, nIters, nFuncs, nFails ) + { + double *rwork = new double[8*nspecies]; - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit){ - if (localTempFlag) - theta = dpdThetaLocal[i]; - else - theta = dpdTheta[i]; + UserRHSData userData; + userData.kFor = new double[nreactions]; + userData.rxnRateLaw = new double[nreactions]; - //Compute the reaction rate constants - for (int irxn = 0; irxn < nreactions; irxn++) - kR[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta); + int ode_counter[4] = { 0 }; - if (odeIntegrationFlag == ODE_LAMMPS_RK4) - rk4(i,rwork); - else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) - rkf45(i,rwork); + #pragma omp for schedule(runtime) + for (int i = 0; i < nlocal; i++) + { + if (mask[i] & groupbit) + { + double theta; + if (localTempFlag) + theta = dpdThetaLocal[i]; + else + theta = dpdTheta[i]; + + //Compute the reaction rate constants + for (int irxn = 0; irxn < nreactions; irxn++) + userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta); + + if (odeIntegrationFlag == ODE_LAMMPS_RK4) + rk4(i, rwork, &userData); + else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) + rkf45(i, rwork, &userData, ode_counter); + } } - TimerType timer_ODE = getTimeStamp(); + nSteps += ode_counter[0]; + nIters += ode_counter[1]; + nFuncs += ode_counter[2]; + nFails += ode_counter[3]; - delete [] rwork; + delete [] rwork; + delete [] userData.kFor; + delete [] userData.rxnRateLaw; + + } // end parallel region + + TimerType timer_ODE = getTimeStamp(); // Communicate the updated momenta and velocities to all nodes comm->forward_comm_fix(this); if(localTempFlag) delete [] dpdThetaLocal; + TimerType timer_stop = getTimeStamp(); + double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE); + printf("me= %d total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, + getElapsedTime(timer_start, timer_stop), + getElapsedTime(timer_start, timer_localTemperature), + getElapsedTime(timer_localTemperature, timer_ODE), + getElapsedTime(timer_ODE, timer_stop), nlocal, nFuncs, nSteps); + // Warn the user if a failure was detected in the ODE solver. if (nFails > 0){ char sbuf[128]; @@ -958,21 +1008,15 @@ void FixRX::setupParams() /* ---------------------------------------------------------------------- */ -void FixRX::rk4(int id, double *rwork) +void FixRX::rk4(int id, double *rwork, void* v_params) { - double *k1 = NULL; - if (rwork == NULL) - k1 = new double[6*nspecies + nreactions]; - else - k1 = rwork; + double *k1 = rwork; double *k2 = k1 + nspecies; double *k3 = k2 + nspecies; double *k4 = k3 + nspecies; double *y = k4 + nspecies; double *yp = y + nspecies; - double *dummyArray = yp + nspecies; // Passed to the rhs function. - const int numSteps = minSteps; const double h = update->dt / double(numSteps); @@ -989,25 +1033,25 @@ void FixRX::rk4(int id, double *rwork) for (int step = 0; step < numSteps; step++) { // k1 - rhs(0.0,y,k1,dummyArray); + rhs(0.0,y,k1,v_params); // k2 for (int ispecies = 0; ispecies < nspecies; ispecies++) yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies]; - rhs(0.0,yp,k2,dummyArray); + rhs(0.0,yp,k2,v_params); // k3 for (int ispecies = 0; ispecies < nspecies; ispecies++) yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies]; - rhs(0.0,yp,k3,dummyArray); + rhs(0.0,yp,k3,v_params); // k4 for (int ispecies = 0; ispecies < nspecies; ispecies++) yp[ispecies] = y[ispecies] + h*k3[ispecies]; - rhs(0.0,yp,k4,dummyArray); + rhs(0.0,yp,k4,v_params); for (int ispecies = 0; ispecies < nspecies; ispecies++) y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0); @@ -1022,9 +1066,6 @@ void FixRX::rk4(int id, double *rwork) y[ispecies] = 0.0; atom->dvector[ispecies][id] = y[ispecies]; } - - if (rwork == NULL) - delete [] k1; } /* ---------------------------------------------------------------------- */ @@ -1274,6 +1315,78 @@ void FixRX::odeDiagnostics(void) double max_per_proc[numCounters]; double min_per_proc[numCounters]; + if(1) + { + static bool firstStep = true; + + static TimerType oldTimeStamp (-1); + + TimerType now = getTimeStamp(); + + // Query the fix database and look for rx_weight for the balance fix. + int type_flag = -1; + int rx_weight_index = atom->find_custom( "rx_weight", /*0:int, 1:float*/ type_flag ); + + // Compute the average # of neighbors. + double averageNumNeighbors = 0; + { + const int inum = pairDPDE->list->inum; + const int* ilist = pairDPDE->list->ilist; + const int* numneigh = pairDPDE->list->numneigh; + + for (int ii = 0; ii < inum; ++ii) + { + const int i = ilist[ii]; + averageNumNeighbors += numneigh[i]; + } + + averageNumNeighbors /= inum; + } + + printf("me= %d nst= %g nfc= %g time= %g nlocal= %g lmpnst= %g weight_idx= %d 1st= %d aveNeigh= %g\n", comm->me, this->diagnosticCounter[0], this->diagnosticCounter[1], this->diagnosticCounter[2], this->diagnosticCounter[3], this->diagnosticCounter[4], rx_weight_index, firstStep, averageNumNeighbors); + + if (rx_weight_index != -1 && !firstStep && 0) + { + double *rx_weight = atom->dvector[rx_weight_index]; + + const int nlocal = atom->nlocal; + const int *mask = atom->mask; + + if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) + { + const double total_time = getElapsedTime( oldTimeStamp, now ); + const double fixrx_time = this->diagnosticCounter[TimeSum]; + const double time_ratio = fixrx_time / total_time; + + double tsum = 0.0; + double tmin = 100000, tmax = 0; + for (int i = 0; i < nlocal; ++i) + if (mask[i] & groupbit) + { + double nfunc_ratio = double( diagnosticCounterPerODE[FuncSum][i] ) / diagnosticCounter[FuncSum]; + rx_weight[i] = nfunc_ratio * fixrx_time + (total_time - fixrx_time) / nlocal; + tmin = fmin( tmin, rx_weight[i] ); + tmax = fmax( tmax, rx_weight[i] ); + tsum += rx_weight[i]; + //rx_weight[i] = (double) diagnosticCounterPerODE[FuncSum][i]; + } + + printf("me= %d total= %g fixrx= %g ratio= %g tsum= %g %g %g %g\n", comm->me, total_time, fixrx_time, time_ratio, tsum, (total_time - fixrx_time) / nlocal, tmin, tmax); + } + else + { + error->warning(FLERR, "Dynamic load balancing enabled but per-atom weights not available."); + + for (int i = 0; i < nlocal; ++i) + if (mask[i] & groupbit) + rx_weight[i] = 1.0; + } + } + + firstStep = false; + oldTimeStamp = now; + } + // Compute counters per dpd time-step. for (int i = 0; i < numCounters; ++i){ my_vals[i] = this->diagnosticCounter[i] / nTimes; @@ -1347,7 +1460,7 @@ void FixRX::odeDiagnostics(void) if (screen) fprintf(screen,"%s\n", smesg); \ if (logfile) fprintf(logfile,"%s\n", smesg); } - sprintf(smesg, "FixRX::ODE Diagnostics: # of steps |# of rhs evals| run-time (sec)"); + sprintf(smesg, "FixRX::ODE Diagnostics: # of iters |# of rhs evals| run-time (sec) | # atoms"); print_mesg(smesg); sprintf(smesg, " AVG per ODE : %-12.5g | %-12.5g | %-12.5g", avg_per_atom[0], avg_per_atom[1], avg_per_atom[2]); @@ -1369,7 +1482,7 @@ void FixRX::odeDiagnostics(void) print_mesg(smesg); } - sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g", avg_per_proc[0], avg_per_proc[1], avg_per_proc[2]); + sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]); print_mesg(smesg); if (comm->nprocs > 1){ @@ -1377,13 +1490,13 @@ void FixRX::odeDiagnostics(void) for (int i = 0; i < numCounters; ++i) rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs ); - sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2]); + sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2], rms_per_proc[AtomSum]); print_mesg(smesg); - sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2]); + sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2], max_per_proc[AtomSum]); print_mesg(smesg); - sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2]); + sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2], min_per_proc[AtomSum]); print_mesg(smesg); } @@ -1403,7 +1516,7 @@ void FixRX::odeDiagnostics(void) return; } -void FixRX::rkf45(int id, double *rwork) +void FixRX::rkf45(int id, double *rwork, void *v_param, int ode_counter[]) { // Rounding coefficient. const double uround = DBL_EPSILON; @@ -1412,12 +1525,7 @@ void FixRX::rkf45(int id, double *rwork) const double adaption_limit = 4.0; //double *y = new double[8*nspecies + nreactions]; - double *y = NULL; - if (rwork == NULL) - y = new double[8*nspecies + nreactions]; - else - y = rwork; - double *rhstmp = y + 8*nspecies; + double *y = rwork; const int neq = nspecies; @@ -1454,7 +1562,7 @@ void FixRX::rkf45(int id, double *rwork) if (h < h_min){ //fprintf(stderr,"hin not implemented yet\n"); //exit(-1); - nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, rhstmp); + nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, v_param); } //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h); @@ -1465,7 +1573,7 @@ void FixRX::rkf45(int id, double *rwork) double *eout = yout + neq; // Take a trial step. - rkf45_step (neq, h, y, yout, eout, rhstmp); + rkf45_step (neq, h, y, yout, eout, v_param); // Estimate the solution error. // ... weighted 2-norm of the error. @@ -1513,16 +1621,17 @@ void FixRX::rkf45(int id, double *rwork) if (maxIters && nit > maxIters){ //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop); - nFails ++; + //nFails ++; + ode_counter[3] ++; break; // We should set an error here so that the solution is not used! } } // end while - nSteps += nst; - nIters += nit; - nFuncs += nfe; + ode_counter[0] += nst; + ode_counter[1] += nit; + ode_counter[2] += nfe; //if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL) if (diagnosticCounterPerODE[StepSum] != NULL){ @@ -1539,9 +1648,6 @@ void FixRX::rkf45(int id, double *rwork) y[ispecies] = 0.0; atom->dvector[ispecies][id] = y[ispecies]; } - - if (rwork == NULL) - delete [] y; } /* ---------------------------------------------------------------------- */ @@ -1559,21 +1665,23 @@ int FixRX::rhs(double t, const double *y, double *dydt, void *params) int FixRX::rhs_dense(double t, const double *y, double *dydt, void *params) { - double rxnRateLawForward; - double *rxnRateLaw = (double *) params; - double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; - double concentration; - int nspecies = atom->nspecies_dpd; + UserRHSData *userData = (UserRHSData *) params; + + double *rxnRateLaw = userData->rxnRateLaw; + double *kFor = userData->kFor; + + const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + const int nspecies = atom->nspecies_dpd; for(int ispecies=0; ispeciesxprd * domain->yprd * domain->zprd / atom->natoms; - #define kFor (this->kR) + #define kFor (userData->kFor) #define kRev (NULL) - #define rxnRateLaw (_rxnRateLaw) + #define rxnRateLaw (userData->rxnRateLaw) #define conc (dydt) #define maxReactants (this->sparseKinetics_maxReactants) #define maxSpecies (this->sparseKinetics_maxSpecies) diff --git a/src/USER-DPD/fix_rx.h b/src/USER-DPD/fix_rx.h index c35c9afabf..5e226aec73 100644 --- a/src/USER-DPD/fix_rx.h +++ b/src/USER-DPD/fix_rx.h @@ -66,19 +66,19 @@ class FixRX : public Fix { double *kR; //!< Classic Runge-Kutta 4th-order stepper. - void rk4(int,double*); + void rk4(int, double*, void*); //!< Runge-Kutta-Fehlberg ODE Solver. - void rkf45(int,double*); + void rkf45(int, double*, void*, int ode_counter[]); //!< Runge-Kutta-Fehlberg ODE stepper function. void rkf45_step (const int neq, const double h, double y[], double y_out[], - double rwk[], void* v_param); + double rwk[], void *); //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. int rkf45_h0 (const int neq, const double t, const double t_stop, const double hmin, const double hmax, - double& h0, double y[], double rwk[], void* v_params); + double& h0, double y[], double rwk[], void *v_params); class PairDPDfdtEnergy *pairDPDE; double *dpdThetaLocal; @@ -90,6 +90,13 @@ class FixRX : public Fix { int rhs(double, const double *, double *, void *); int rhs_dense (double, const double *, double *, void *); + // User-defined data container needed in rhs. + struct UserRHSData + { + double *kFor; + double *rxnRateLaw; + }; + // Sparse stoichiometric matrix storage format and methods. bool useSparseKinetics; //SparseKinetics sparseKinetics; @@ -116,10 +123,10 @@ class FixRX : public Fix { double relTol, absTol; //!< Relative and absolute tolerances for the ODE solver(s). // ODE Diagnostics - int nSteps; //!< # of accepted steps taken over all atoms. - int nIters; //!< # of attemped steps for all atoms. - int nFuncs; //!< # of RHS evaluations for all atoms. - int nFails; //!< # of ODE systems that failed (for some reason). + //int nSteps; //!< # of accepted steps taken over all atoms. + //int nIters; //!< # of attemped steps for all atoms. + //int nFuncs; //!< # of RHS evaluations for all atoms. + //int nFails; //!< # of ODE systems that failed (for some reason). int diagnosticFrequency; //!< Frequency (LMP steps) that run-time diagnostics will be printed to the log. enum { numDiagnosticCounters = 5 }; From 2ea900df007e59905720503adbc4955c5c45b574 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sat, 28 Jan 2017 10:41:16 -0500 Subject: [PATCH 111/267] Updated FixRxKokkos to use kokkos-managed data objects. - Switched to use kokkos dvector, mask, and dpdTheta views from atomKK. --- src/KOKKOS/fix_rx_kokkos.cpp | 54 ++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index b989d6b2d4..19da344db8 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -760,9 +760,9 @@ void FixRxKokkos::pre_force(int vflag) TimerType timer_start = getTimeStamp(); - int nlocal = atom->nlocal; - int nghost = atom->nghost; - int newton_pair = force->newton_pair; + const int nlocal = atom->nlocal; + const int nghost = atom->nghost; + const int newton_pair = force->newton_pair; const bool setToZero = false; // don't set the forward rates to zero. @@ -776,12 +776,23 @@ void FixRxKokkos::pre_force(int vflag) TimerType timer_localTemperature = getTimeStamp(); // Total counters from the ODE solvers. - CounterType Counters; + CounterType TotalCounters; // Set data needed in the operators. - int *mask = atom->mask; - double *dpdTheta = atom->dpdTheta; + // ... + //int *mask = atom->mask; + //double *dpdTheta = atom->dpdTheta; + + // Local references to the atomKK objects. + typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); + typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); + typename ArrayTypes::t_int_1d d_mask = atomKK->k_mask.view(); + + // Get up-to-date data. + atomKK->sync( execution_space, MASK_MASK | DVECTOR_MASK | DPDTHETA_MASK ); + + // Set some constants outside of the parallel_for const double boltz = force->boltz; const double t_stop = update->dt; // DPD time-step and integration length. @@ -796,7 +807,7 @@ void FixRxKokkos::pre_force(int vflag) Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) { - if (mask[i] & groupbit) + if (d_mask(i) & groupbit) { double *y = new double[8*nspecies]; double *rwork = y + nspecies; @@ -807,7 +818,7 @@ void FixRxKokkos::pre_force(int vflag) CounterType counter_i; - const double theta = (localTempFlag) ? dpdThetaLocal[i] : dpdTheta[i]; + const double theta = (localTempFlag) ? dpdThetaLocal[i] : d_dpdTheta(i); //Compute the reaction rate constants for (int irxn = 0; irxn < nreactions; irxn++) @@ -819,14 +830,13 @@ void FixRxKokkos::pre_force(int vflag) userData.kFor[irxn] = d_kineticsData.Arr(irxn) * pow(theta, d_kineticsData.nArr(irxn)) * exp(-d_kineticsData.Ea(irxn) / boltz / theta); - //userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/boltz/theta); } } // Update ConcOld and initialize the ODE solution vector y[]. for (int ispecies = 0; ispecies < nspecies; ispecies++){ - const double tmp = atom->dvector[ispecies][i]; - atom->dvector[ispecies+nspecies][i] = tmp; + const double tmp = d_dvector(ispecies, i); + d_dvector(ispecies+nspecies, i) = tmp; y[ispecies] = tmp; } @@ -845,7 +855,7 @@ void FixRxKokkos::pre_force(int vflag) error->one(FLERR,"Computed concentration in RK4 solver is < -10*DBL_EPSILON"); else if(y[ispecies] < MY_EPSILON) y[ispecies] = 0.0; - atom->dvector[ispecies][i] = y[ispecies]; + d_dvector(ispecies,i) = y[ispecies]; } } else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) @@ -858,7 +868,7 @@ void FixRxKokkos::pre_force(int vflag) error->one(FLERR,"Computed concentration in RKF45 solver is < -1.0e-10"); else if(y[ispecies] < MY_EPSILON) y[ispecies] = 0.0; - atom->dvector[ispecies][i] = y[ispecies]; + d_dvector(ispecies,i) = y[ispecies]; } //if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL) @@ -877,13 +887,21 @@ void FixRxKokkos::pre_force(int vflag) } // if } // parallel_for lambda-body - , Counters // reduction value + , TotalCounters // reduction value ); TimerType timer_ODE = getTimeStamp(); - // Communicate the updated momenta and velocities to all nodes + // Signal that dvector has been modified on this execution space. + atomKK->modified( execution_space, DVECTOR_MASK ); + + // Communicate the updated species data to all nodes + atomKK->sync ( Host, DVECTOR_MASK ); + comm->forward_comm_fix(this); + + atomKK->modified ( Host, DVECTOR_MASK ); + if(localTempFlag) delete [] dpdThetaLocal; TimerType timer_stop = getTimeStamp(); @@ -894,12 +912,12 @@ void FixRxKokkos::pre_force(int vflag) getElapsedTime(timer_start, timer_stop), getElapsedTime(timer_start, timer_localTemperature), getElapsedTime(timer_localTemperature, timer_ODE), - getElapsedTime(timer_ODE, timer_stop), nlocal, Counters.nFuncs, Counters.nSteps); + getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps); // Warn the user if a failure was detected in the ODE solver. - if (Counters.nFails > 0){ + if (TotalCounters.nFails > 0){ char sbuf[128]; - sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", Counters.nFails); + sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", TotalCounters.nFails); error->warning(FLERR, sbuf); } From 843f3a9192564bc863d739636453598f046a8555 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sat, 28 Jan 2017 12:02:49 -0500 Subject: [PATCH 112/267] Updates to FixRxKokkos ... - Added templated computeLocalTemp<>() to FixRxKokkos but still using the original host data pointers. - Updated the copy-back to dvector operation to be the same with RK4 and RKF45 per discussion with J. Larentzos. TODO: - Add kokkos data for computeLocalTemp and parallel_for loop. --- src/KOKKOS/fix_rx_kokkos.cpp | 163 ++++++++++++++++++++++++++++------- src/KOKKOS/fix_rx_kokkos.h | 3 + 2 files changed, 136 insertions(+), 30 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 19da344db8..45af816810 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -39,6 +39,10 @@ using namespace MathSpecial; #define SparseKinetics_enableIntegralReactions (true) #define SparseKinetics_invalidIndex (-1) +// From fix_rx.cpp ... this should be lifted into fix_rx.h or fix_rx_kokkos.h? +enum{NONE,HARMONIC}; +enum{LUCY}; + namespace /* anonymous */ { @@ -770,7 +774,19 @@ void FixRxKokkos::pre_force(int vflag) int count = nlocal + (newton_pair ? nghost : 0); dpdThetaLocal = new double[count]; memset(dpdThetaLocal, 0, sizeof(double)*count); - computeLocalTemperature(); + //FixRx::computeLocalTemperature(); + + // Are there is no other options than wtFlag = (0)LUCY and localTempFlag = NONE : HARMONIC? + if (localTempFlag == HARMONIC) + if (newton_pair) + computeLocalTemperature (); + else + computeLocalTemperature (); + else + if (newton_pair) + computeLocalTemperature (); + else + computeLocalTemperature (); } TimerType timer_localTemperature = getTimeStamp(); @@ -834,7 +850,8 @@ void FixRxKokkos::pre_force(int vflag) } // Update ConcOld and initialize the ODE solution vector y[]. - for (int ispecies = 0; ispecies < nspecies; ispecies++){ + for (int ispecies = 0; ispecies < nspecies; ispecies++) + { const double tmp = d_dvector(ispecies, i); d_dvector(ispecies+nspecies, i) = tmp; y[ispecies] = tmp; @@ -844,50 +861,41 @@ void FixRxKokkos::pre_force(int vflag) if (odeIntegrationFlag == ODE_LAMMPS_RK4) { rk4(t_stop, y, rwork, &userData); - - /* This should be a duplicate of the copy-out in the - rkf45 block but for the MY_EPSILON v. -1e-10 (literal) - difference. Can these be merged? */ - - // Store the solution back in atom->dvector. - for (int ispecies = 0; ispecies < nspecies; ispecies++){ - if(y[ispecies] < -MY_EPSILON) - error->one(FLERR,"Computed concentration in RK4 solver is < -10*DBL_EPSILON"); - else if(y[ispecies] < MY_EPSILON) - y[ispecies] = 0.0; - d_dvector(ispecies,i) = y[ispecies]; - } } else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) { rkf45(nspecies, t_stop, y, rwork, &userData, counter_i); - // Store the solution back in atom->dvector. - for (int ispecies = 0; ispecies < nspecies; ispecies++){ - if(y[ispecies] < -1.0e-10) - error->one(FLERR,"Computed concentration in RKF45 solver is < -1.0e-10"); - else if(y[ispecies] < MY_EPSILON) - y[ispecies] = 0.0; - d_dvector(ispecies,i) = y[ispecies]; - } - //if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL) - if (diagnosticCounterPerODE[StepSum] != NULL) - { - diagnosticCounterPerODE[StepSum][i] = counter_i.nSteps; - diagnosticCounterPerODE[FuncSum][i] = counter_i.nFuncs; - } + //if (diagnosticCounterPerODE[StepSum] != NULL) + //{ + // diagnosticCounterPerODE[StepSum][i] = counter_i.nSteps; + // diagnosticCounterPerODE[FuncSum][i] = counter_i.nFuncs; + //} + } + + // Store the solution back in dvector. + for (int ispecies = 0; ispecies < nspecies; ispecies++) + { + if (y[ispecies] < -MY_EPSILON) + error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + else if (y[ispecies] < MY_EPSILON) + y[ispecies] = 0.0; + + d_dvector(ispecies,i) = y[ispecies]; } delete [] y; delete [] userData.kFor; delete [] userData.rxnRateLaw; + // Update the iteration statistics counter. Is this unique for each iteration? counter += counter_i; + } // if } // parallel_for lambda-body - , TotalCounters // reduction value + , TotalCounters // reduction value for all iterations. ); TimerType timer_ODE = getTimeStamp(); @@ -942,6 +950,101 @@ void FixRxKokkos::pre_force(int vflag) } */ } +/* ---------------------------------------------------------------------- */ + +template + template +void FixRxKokkos::computeLocalTemperature() +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double rsq; + int *ilist,*jlist,*numneigh,**firstneigh; + + double **x = atom->x; + int *type = atom->type; + int nlocal = atom->nlocal; + int nghost = atom->nghost; + //int newton_pair = force->newton_pair; + + // local temperature variables + double wij=0.0; + double *dpdTheta = atom->dpdTheta; + + // Initialize the local temperature weight array + int sumWeightsCt = nlocal + (IS_NEWTON_PAIR ? nghost : 0); + sumWeights = new double[sumWeightsCt]; + memset(sumWeights, 0, sizeof(double)*sumWeightsCt); + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < pairDPDE->cutsq[itype][jtype]) { + double rcut = sqrt(pairDPDE->cutsq[itype][jtype]); + double rij = sqrt(rsq); + double ratio = rij/rcut; + + // Lucy's Weight Function + if (WT_FLAG == LUCY) + { + wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio); + dpdThetaLocal[i] += wij/dpdTheta[j]; + if (IS_NEWTON_PAIR || j < nlocal) + dpdThetaLocal[j] += wij/dpdTheta[i]; + } + + sumWeights[i] += wij; + if (IS_NEWTON_PAIR || j < nlocal) + sumWeights[j] += wij; + } + } + } + if (IS_NEWTON_PAIR) comm->reverse_comm_fix(this); + + // self-interaction for local temperature + for (i = 0; i < nlocal; i++){ + + // Lucy Weight Function + if (WT_FLAG == LUCY) + { + wij = 1.0; + dpdThetaLocal[i] += wij / dpdTheta[i]; + } + sumWeights[i] += wij; + + // Normalized local temperature + dpdThetaLocal[i] = dpdThetaLocal[i] / sumWeights[i]; + + if (LOCAL_TEMP_FLAG == HARMONIC) + dpdThetaLocal[i] = 1.0 / dpdThetaLocal[i]; + + } + + delete [] sumWeights; +} + namespace LAMMPS_NS { template class FixRxKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 95872c67e9..ec9a8fa976 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -121,6 +121,9 @@ class FixRxKokkos : public FixRX { void create_kinetics_data(void); + template + void computeLocalTemperature(); + }; } From acba25c3831249f54353f1a0a76b63f42881da6f Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sat, 28 Jan 2017 15:58:21 -0500 Subject: [PATCH 113/267] Added kokkos datatypes to FixRxKokkos::computeLocalTemperature(...) Added kokkos dual-view datatypes used in computeLocalTemperature and pre_force (e.g., dpdThetaLocal) but still using the original host pointers for the pack/unpack operations. TODO: - The Kokkos neighbor list is not working. Need to request a Kokkos neighbor list in ::init(). Then, replace objects like list->ilist[] with k_list->d_ilist(). - Add another template parameter for HALFTHREAD and create (automatic) atomic view of dpdThetaLocal and sumWeights. - Add modify/sync comments and replace the host-only pointers in the pack/unpack methods. --- src/KOKKOS/fix_rx_kokkos.cpp | 220 +++++++++++++++++++++++++---------- src/KOKKOS/fix_rx_kokkos.h | 6 +- 2 files changed, 166 insertions(+), 60 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 45af816810..491b32e01d 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -95,6 +95,15 @@ void FixRxKokkos::init() /* ---------------------------------------------------------------------- */ +//template +//void FixRXKokkos::init_list(int, class NeighList* ptr) +//{ +// printf("Inside FixRxKokkos::init_list\n"); +// this->list = ptr; +//} + +/* ---------------------------------------------------------------------- */ + template void FixRxKokkos::rk4(const double t_stop, double *y, double *rwork, void* v_params) const { @@ -770,12 +779,17 @@ void FixRxKokkos::pre_force(int vflag) const bool setToZero = false; // don't set the forward rates to zero. - if(localTempFlag){ - int count = nlocal + (newton_pair ? nghost : 0); - dpdThetaLocal = new double[count]; - memset(dpdThetaLocal, 0, sizeof(double)*count); + if (localTempFlag) + { + const int count = nlocal + (newton_pair ? nghost : 0); + + //dpdThetaLocal = new double[count]; + //memset(dpdThetaLocal, 0, sizeof(double)*count); //FixRx::computeLocalTemperature(); + memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); + d_dpdThetaLocal = k_dpdThetaLocal.d_view; + // Are there is no other options than wtFlag = (0)LUCY and localTempFlag = NONE : HARMONIC? if (localTempFlag == HARMONIC) if (newton_pair) @@ -802,8 +816,8 @@ void FixRxKokkos::pre_force(int vflag) // Local references to the atomKK objects. typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); - typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); - typename ArrayTypes::t_int_1d d_mask = atomKK->k_mask.view(); + typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); + typename ArrayTypes::t_int_1d d_mask = atomKK->k_mask.view(); // Get up-to-date data. atomKK->sync( execution_space, MASK_MASK | DVECTOR_MASK | DPDTHETA_MASK ); @@ -834,7 +848,8 @@ void FixRxKokkos::pre_force(int vflag) CounterType counter_i; - const double theta = (localTempFlag) ? dpdThetaLocal[i] : d_dpdTheta(i); + //const double theta = (localTempFlag) ? dpdThetaLocal[i] : d_dpdTheta(i); + const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i); //Compute the reaction rate constants for (int irxn = 0; irxn < nreactions; irxn++) @@ -910,7 +925,11 @@ void FixRxKokkos::pre_force(int vflag) atomKK->modified ( Host, DVECTOR_MASK ); - if(localTempFlag) delete [] dpdThetaLocal; + if (localTempFlag) + { + //delete [] dpdThetaLocal; + memory->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); + } TimerType timer_stop = getTimeStamp(); @@ -953,96 +972,179 @@ void FixRxKokkos::pre_force(int vflag) /* ---------------------------------------------------------------------- */ template - template + template void FixRxKokkos::computeLocalTemperature() { - int i,j,ii,jj,inum,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz; - double rsq; - int *ilist,*jlist,*numneigh,**firstneigh; + printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag); - double **x = atom->x; - int *type = atom->type; - int nlocal = atom->nlocal; - int nghost = atom->nghost; - //int newton_pair = force->newton_pair; + //int inum,jnum,itype,jtype; + //double xtmp,ytmp,ztmp,delx,dely,delz; + //double rsq; + //int *ilist,*jlist,*numneigh,**firstneigh; + + //double **x = atom->x; + //int *type = atom->type; + //double *dpdTheta = atom->dpdTheta; + + typename ArrayTypes::t_x_array_randomread d_x = atomKK->k_x.view(); + typename ArrayTypes::t_int_1d_randomread d_type = atomKK->k_type.view(); + typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); + + atomKK->sync(execution_space, X_MASK | TYPE_MASK | DPDTHETA_MASK ); + + const int nlocal = atom->nlocal; + const int nghost = atom->nghost; + //const int newton_pair = force->newton_pair; // local temperature variables - double wij=0.0; - double *dpdTheta = atom->dpdTheta; + //double wij=0.0; + + // Pull from pairDPDE. The pairDPDEKK objects are producted so recreate here for now. + //pairDPDEKK->k_cutsq.template sync(); + //typename ArrayTypes::t_ffloat_2d d_cutsq = pairDPDEKK->k_cutsq.template view::tdual_ffloat_2d k_cutsq; + typename ArrayTypes::t_ffloat_2d d_cutsq; + double **h_cutsq; + + { + const int ntypes = atom->ntypes; + + memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); + d_cutsq = k_cutsq.template view(); + + for (int i = 1; i <= ntypes; ++i) + for (int j = i; j <= ntypes; ++j) + { + k_cutsq.h_view(i,j) = pairDPDE->cutsq[i][j]; + k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j); + } + + k_cutsq.template modify(); + k_cutsq.template sync(); + } // Initialize the local temperature weight array - int sumWeightsCt = nlocal + (IS_NEWTON_PAIR ? nghost : 0); - sumWeights = new double[sumWeightsCt]; - memset(sumWeights, 0, sizeof(double)*sumWeightsCt); + int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); + //sumWeights = new double[sumWeightsCt]; + //memset(sumWeights, 0, sizeof(double)*sumWeightsCt); - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; + memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + d_sumWeights = k_sumWeights.d_view; + + // Initialize the accumulator to zero ... + Kokkos::parallel_for(nlocal, + LAMMPS_LAMBDA(int i) + { + d_sumWeights(i) = 0.0; + } + ); + + const int inum = list->inum; + + // Local list views. (This isn't working!) + //NeighListKokkos* k_list = static_cast*>(list); + //if (not(list->kokkos)) + //{ + // error->one(FLERR,"list is not a Kokkos list\n"); + //} + + //typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; + //typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; + //typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; + + int* ilist = list->ilist; + int* numneigh = list->numneigh; + int** firstneigh = list->firstneigh; // loop over neighbors of my atoms - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; + for (int ii = 0; ii < inum; ii++) + { + const int i = ilist[ii]; + //const int i = d_ilist(ii); + + //const double xtmp = x[i][0]; + //const double ytmp = x[i][1]; + //const double ztmp = x[i][2]; + //const int itype = type[i]; + const double xtmp = d_x(i,0); + const double ytmp = d_x(i,1); + const double ztmp = d_x(i,2); + const int itype = d_type(i); - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= NEIGHMASK; - jtype = type[j]; + int *jlist = firstneigh[i]; + const int jnum = numneigh[i]; + //const int jnum = d_numneigh(i); - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; + for (int jj = 0; jj < jnum; jj++) + { + const int j = (jlist[jj] & NEIGHMASK); + //const int j = (d_neighbors(i,jj) & NEIGHMASK); + //const int jtype = type[j]; + const int jtype = d_type(j); - if (rsq < pairDPDE->cutsq[itype][jtype]) { - double rcut = sqrt(pairDPDE->cutsq[itype][jtype]); + //const double delx = xtmp - x[j][0]; + //const double dely = ytmp - x[j][1]; + //const double delz = ztmp - x[j][2]; + const double delx = xtmp - d_x(j,0); + const double dely = ytmp - d_x(j,1); + const double delz = ztmp - d_x(j,2); + const double rsq = delx*delx + dely*dely + delz*delz; + + const double cutsq_ij = d_cutsq(itype,jtype); + + if (rsq < cutsq_ij) + { + const double rcut = sqrt( cutsq_ij ); double rij = sqrt(rsq); double ratio = rij/rcut; + double wij = 0.0; + // Lucy's Weight Function if (WT_FLAG == LUCY) { wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio); - dpdThetaLocal[i] += wij/dpdTheta[j]; - if (IS_NEWTON_PAIR || j < nlocal) - dpdThetaLocal[j] += wij/dpdTheta[i]; + d_dpdThetaLocal(i) += wij / d_dpdTheta(j); + if (NEWTON_PAIR || j < nlocal) + d_dpdThetaLocal(j) += wij / d_dpdTheta(i); } - sumWeights[i] += wij; - if (IS_NEWTON_PAIR || j < nlocal) - sumWeights[j] += wij; + d_sumWeights(i) += wij; + if (NEWTON_PAIR || j < nlocal) + d_sumWeights(j) += wij; } } } - if (IS_NEWTON_PAIR) comm->reverse_comm_fix(this); + + if (NEWTON_PAIR) comm->reverse_comm_fix(this); // self-interaction for local temperature - for (i = 0; i < nlocal; i++){ + for (int i = 0; i < nlocal; i++) + { + double wij = 0.0; // Lucy Weight Function if (WT_FLAG == LUCY) { wij = 1.0; - dpdThetaLocal[i] += wij / dpdTheta[i]; + d_dpdThetaLocal(i) += wij / d_dpdTheta(i); } - sumWeights[i] += wij; + d_sumWeights(i) += wij; // Normalized local temperature - dpdThetaLocal[i] = dpdThetaLocal[i] / sumWeights[i]; + d_dpdThetaLocal(i) = d_dpdThetaLocal(i) / d_sumWeights(i); if (LOCAL_TEMP_FLAG == HARMONIC) - dpdThetaLocal[i] = 1.0 / dpdThetaLocal[i]; - + d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i); } - delete [] sumWeights; + // Clean up the local kokkos data. + memory->destroy_kokkos(k_cutsq, h_cutsq); + memory->destroy_kokkos(k_sumWeights, sumWeights); + + //delete [] sumWeights; } namespace LAMMPS_NS { diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index ec9a8fa976..9d60f2b99e 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -121,7 +121,11 @@ class FixRxKokkos : public FixRX { void create_kinetics_data(void); - template + // Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks. + DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights; + typename ArrayTypes::t_efloat_1d d_dpdThetaLocal, d_sumWeights; + + template void computeLocalTemperature(); }; From 0d57a1d831e6a73e55491adb982c44175be4c76f Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Fri, 3 Feb 2017 16:09:06 -0500 Subject: [PATCH 114/267] Added setup_pre_force, pack/unpack methods to FixRxKokkos. - Added a kokkos version of setup_pre_force that only sets dvector and then communicates that. - Converted all for loops to parallel_for's in computeLocalTemperator() and setup_pre_force. - Added pack/unpack forward/reverse methods with Kokkos host views. TODO: - The Kokkos neighbor list is not working. Need to request a Kokkos neighbor list in ::init(). Then, replace objects like list->ilist[] with k_list->d_ilist(). --- src/KOKKOS/fix_rx_kokkos.cpp | 343 ++++++++++++++++++++++++++--------- src/KOKKOS/fix_rx_kokkos.h | 12 +- 2 files changed, 272 insertions(+), 83 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 491b32e01d..167f2713ea 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -77,6 +77,18 @@ FixRxKokkos::~FixRxKokkos() /* ---------------------------------------------------------------------- */ +template +void FixRxKokkos::post_constructor() +{ + // Run the parents and then reset one value. + FixRX::post_constructor(); + + // Need a copy of this + this->my_restartFlag = modify->fix[modify->nfix-1]->restart_reset; +} + +/* ---------------------------------------------------------------------- */ + template void FixRxKokkos::init() { @@ -763,6 +775,51 @@ void FixRxKokkos::create_kinetics_data(void) /* ---------------------------------------------------------------------- */ +template +void FixRxKokkos::setup_pre_force(int vflag) +{ + printf("Inside FixRxKokkos::setup_pre_force restartFlag= %d\n", my_restartFlag); + + if (my_restartFlag) + my_restartFlag = 0; + else + { + const int nlocal = atom->nlocal; + //const int nghost = atom->nghost; + //const int *mask = atom->mask; + //const int newton_pair = force->newton_pair; + + typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); + + // Get up-to-date data. + atomKK->sync( execution_space, DVECTOR_MASK ); + + // The only net effect from fix_rx.cpp is to set dvector[nspecies:2*nspecies] + // since the reactions are set to zero for step 0. + Kokkos::parallel_for ( nlocal, + LAMMPS_LAMBDA(const int i) + { + for (int ispecies = 0; ispecies < nspecies; ispecies++) + d_dvector(ispecies+nspecies,i) = d_dvector(ispecies,i); + } + ); + + // Signal that dvector has been modified on this execution space. + atomKK->modified( execution_space, DVECTOR_MASK ); + + // Communicate the updated species data to all nodes + atomKK->sync ( Host, DVECTOR_MASK ); + + // Communicate the dvector to all nodes + comm->forward_comm_fix(this); + + // Flag that dvector was updated on the host in the comm. + atomKK->modified ( Host, DVECTOR_MASK ); + } +} + +/* ---------------------------------------------------------------------- */ + template void FixRxKokkos::pre_force(int vflag) { @@ -789,18 +846,31 @@ void FixRxKokkos::pre_force(int vflag) memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); d_dpdThetaLocal = k_dpdThetaLocal.d_view; + h_dpdThetaLocal = k_dpdThetaLocal.h_view; + + const int neighflag = lmp->kokkos->neighflag; + +#define _template_switch(_wtflag, _localTempFlag) { \ + if (neighflag == HALF) \ + if (newton_pair) \ + computeLocalTemperature<_wtflag, _localTempFlag, true , HALF> (); \ + else \ + computeLocalTemperature<_wtflag, _localTempFlag, false, HALF> (); \ + else if (neighflag == HALFTHREAD) \ + if (newton_pair) \ + computeLocalTemperature<_wtflag, _localTempFlag, true , HALFTHREAD> (); \ + else \ + computeLocalTemperature<_wtflag, _localTempFlag, false, HALFTHREAD> (); \ + } // Are there is no other options than wtFlag = (0)LUCY and localTempFlag = NONE : HARMONIC? - if (localTempFlag == HARMONIC) - if (newton_pair) - computeLocalTemperature (); - else - computeLocalTemperature (); - else - if (newton_pair) - computeLocalTemperature (); - else - computeLocalTemperature (); + if (localTempFlag == HARMONIC) { + _template_switch(LUCY, HARMONIC) + } + else { + _template_switch(LUCY, NONE) + } +#undef _template_switch } TimerType timer_localTemperature = getTimeStamp(); @@ -972,10 +1042,9 @@ void FixRxKokkos::pre_force(int vflag) /* ---------------------------------------------------------------------- */ template - template + template void FixRxKokkos::computeLocalTemperature() { - printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag); //int inum,jnum,itype,jtype; //double xtmp,ytmp,ztmp,delx,dely,delz; @@ -996,10 +1065,12 @@ void FixRxKokkos::computeLocalTemperature() const int nghost = atom->nghost; //const int newton_pair = force->newton_pair; + printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); + // local temperature variables //double wij=0.0; - // Pull from pairDPDE. The pairDPDEKK objects are producted so recreate here for now. + // Pull from pairDPDE. The pairDPDEKK objects are protected so recreate here for now. //pairDPDEKK->k_cutsq.template sync(); //typename ArrayTypes::t_ffloat_2d d_cutsq = pairDPDEKK->k_cutsq.template view::computeLocalTemperature() memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); d_sumWeights = k_sumWeights.d_view; + h_sumWeights = k_sumWeights.h_view; // Initialize the accumulator to zero ... - Kokkos::parallel_for(nlocal, - LAMMPS_LAMBDA(int i) - { - d_sumWeights(i) = 0.0; - } - ); + Kokkos::parallel_for (sumWeightsCt, + LAMMPS_LAMBDA(const int i) + { + d_sumWeights(i) = 0.0; + } + ); const int inum = list->inum; @@ -1059,86 +1131,106 @@ void FixRxKokkos::computeLocalTemperature() int** firstneigh = list->firstneigh; // loop over neighbors of my atoms - for (int ii = 0; ii < inum; ii++) - { - const int i = ilist[ii]; - //const int i = d_ilist(ii); + Kokkos::parallel_for ( inum, + LAMMPS_LAMBDA(const int ii) + { + // Create an atomic view of sumWeights and dpdThetaLocal. Only needed + // for Half/thread scenarios. + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + + AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; + AtomicViewType a_sumWeights = d_sumWeights; + + // Local scalar accumulators. + double i_dpdThetaLocal = 0.0; + double i_sumWeights = 0.0; + + const int i = ilist[ii]; + //const int i = d_ilist(ii); - //const double xtmp = x[i][0]; - //const double ytmp = x[i][1]; - //const double ztmp = x[i][2]; - //const int itype = type[i]; - const double xtmp = d_x(i,0); - const double ytmp = d_x(i,1); - const double ztmp = d_x(i,2); - const int itype = d_type(i); + const double xtmp = d_x(i,0); + const double ytmp = d_x(i,1); + const double ztmp = d_x(i,2); + const int itype = d_type(i); - int *jlist = firstneigh[i]; - const int jnum = numneigh[i]; - //const int jnum = d_numneigh(i); + int *jlist = firstneigh[i]; + const int jnum = numneigh[i]; + //const int jnum = d_numneigh(i); - for (int jj = 0; jj < jnum; jj++) - { - const int j = (jlist[jj] & NEIGHMASK); - //const int j = (d_neighbors(i,jj) & NEIGHMASK); - //const int jtype = type[j]; - const int jtype = d_type(j); + for (int jj = 0; jj < jnum; jj++) + { + const int j = (jlist[jj] & NEIGHMASK); + //const int j = (d_neighbors(i,jj) & NEIGHMASK); + const int jtype = d_type(j); - //const double delx = xtmp - x[j][0]; - //const double dely = ytmp - x[j][1]; - //const double delz = ztmp - x[j][2]; - const double delx = xtmp - d_x(j,0); - const double dely = ytmp - d_x(j,1); - const double delz = ztmp - d_x(j,2); - const double rsq = delx*delx + dely*dely + delz*delz; + const double delx = xtmp - d_x(j,0); + const double dely = ytmp - d_x(j,1); + const double delz = ztmp - d_x(j,2); + const double rsq = delx*delx + dely*dely + delz*delz; - const double cutsq_ij = d_cutsq(itype,jtype); + const double cutsq_ij = d_cutsq(itype,jtype); - if (rsq < cutsq_ij) - { - const double rcut = sqrt( cutsq_ij ); - double rij = sqrt(rsq); - double ratio = rij/rcut; + if (rsq < cutsq_ij) + { + const double rcut = sqrt( cutsq_ij ); + double rij = sqrt(rsq); + double ratio = rij/rcut; - double wij = 0.0; + double wij = 0.0; - // Lucy's Weight Function - if (WT_FLAG == LUCY) - { - wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio); - d_dpdThetaLocal(i) += wij / d_dpdTheta(j); - if (NEWTON_PAIR || j < nlocal) - d_dpdThetaLocal(j) += wij / d_dpdTheta(i); + // Lucy's Weight Function + if (WT_FLAG == LUCY) + { + wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio); + i_dpdThetaLocal += wij / d_dpdTheta(j); + if (NEWTON_PAIR || j < nlocal) + a_dpdThetaLocal(j) += wij / d_dpdTheta(i); + } + + i_sumWeights += wij; + if (NEWTON_PAIR || j < nlocal) + a_sumWeights(j) += wij; + } + } + + // Update, don't assign, the array value (because another iteration may have hit it). + a_dpdThetaLocal(i) += i_dpdThetaLocal; + a_sumWeights(i) += i_sumWeights; } + ); - d_sumWeights(i) += wij; - if (NEWTON_PAIR || j < nlocal) - d_sumWeights(j) += wij; - } - } - } + // Signal that dpdThetaLocal and sumWeights have been modified. + k_dpdThetaLocal.template modify(); + k_sumWeights. template modify(); + // Communicate the sum dpdTheta and the weights on the host. if (NEWTON_PAIR) comm->reverse_comm_fix(this); + // Update the device view in case they got changed. + k_dpdThetaLocal.template sync(); + k_sumWeights. template sync(); + // self-interaction for local temperature - for (int i = 0; i < nlocal; i++) - { - double wij = 0.0; + Kokkos::parallel_for ( nlocal, + LAMMPS_LAMBDA(const int i) + { + double wij = 0.0; - // Lucy Weight Function - if (WT_FLAG == LUCY) - { - wij = 1.0; - d_dpdThetaLocal(i) += wij / d_dpdTheta(i); - } - d_sumWeights(i) += wij; + // Lucy Weight Function + if (WT_FLAG == LUCY) + { + wij = 1.0; + d_dpdThetaLocal(i) += wij / d_dpdTheta(i); + } + d_sumWeights(i) += wij; - // Normalized local temperature - d_dpdThetaLocal(i) = d_dpdThetaLocal(i) / d_sumWeights(i); + // Normalized local temperature + d_dpdThetaLocal(i) = d_dpdThetaLocal(i) / d_sumWeights(i); - if (LOCAL_TEMP_FLAG == HARMONIC) - d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i); - } + if (LOCAL_TEMP_FLAG == HARMONIC) + d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i); + } + ); // Clean up the local kokkos data. memory->destroy_kokkos(k_cutsq, h_cutsq); @@ -1147,6 +1239,93 @@ void FixRxKokkos::computeLocalTemperature() //delete [] sumWeights; } +/* ---------------------------------------------------------------------- */ + +template +int FixRxKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) +{ + //printf("inside FixRxKokkos::pack_forward_comm %d\n", comm->me); + + HAT::t_float_2d h_dvector = atomKK->k_dvector.h_view; + + int m = 0; + for (int ii = 0; ii < n; ii++) { + const int jj = list[ii]; + for(int ispecies = 0; ispecies < nspecies; ispecies++){ + buf[m++] = h_dvector(ispecies,jj); + buf[m++] = h_dvector(ispecies+nspecies,jj); + } + } + + //printf("done with FixRxKokkos::pack_forward_comm %d\n", comm->me); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + //printf("inside FixRxKokkos::unpack_forward_comm %d\n", comm->me); + + HAT::t_float_2d h_dvector = atomKK->k_dvector.h_view; + + const int last = first + n ; + int m = 0; + for (int ii = first; ii < last; ii++){ + for (int ispecies = 0; ispecies < nspecies; ispecies++){ + h_dvector(ispecies,ii) = buf[m++]; + h_dvector(ispecies+nspecies,ii) = buf[m++]; + } + } + + //printf("done with FixRxKokkos::unpack_forward_comm %d\n", comm->me); +} + +/* ---------------------------------------------------------------------- */ + +template +int FixRxKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + //printf("inside FixRxKokkos::pack_reverse_comm %d %d %d\n", comm->me, first, n); + // Sync the host view. + k_dpdThetaLocal.template sync(); + k_sumWeights. template sync(); + + const int last = first + n; + int m = 0; + for (int i = first; i < last; ++i) + { + buf[m++] = h_dpdThetaLocal(i); + buf[m++] = h_sumWeights(i); + } + //printf("done with FixRxKokkos::pack_reverse_comm %d\n", comm->me); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + // printf("inside FixRxKokkos::unpack_reverse_comm %d\n", comm->me); + int m = 0; + for (int i = 0; i < n; i++) { + const int j = list[i]; + + h_dpdThetaLocal(j) += buf[m++]; + h_sumWeights(j) += buf[m++]; + } + + // Signal that the host view has been modified. + k_dpdThetaLocal.template modify(); + k_sumWeights. template modify(); + + // printf("done with FixRxKokkos::unpack_reverse_comm %d\n", comm->me); +} + namespace LAMMPS_NS { template class FixRxKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 9d60f2b99e..d397d91499 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -40,6 +40,8 @@ class FixRxKokkos : public FixRX { FixRxKokkos(class LAMMPS *, int, char **); virtual ~FixRxKokkos(); virtual void init(); + void post_constructor(); + virtual void setup_pre_force(int); virtual void pre_force(int); //template @@ -124,10 +126,18 @@ class FixRxKokkos : public FixRX { // Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks. DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights; typename ArrayTypes::t_efloat_1d d_dpdThetaLocal, d_sumWeights; + typename HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights; - template + template void computeLocalTemperature(); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + int pack_forward_comm(int , int *, double *, int, int *); + void unpack_forward_comm(int , int , double *); + + private: // replicate a few from FixRX + int my_restartFlag; }; } From f2d005fb8db00fa90a151c2237985a77c7473b2a Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Tue, 7 Feb 2017 16:24:59 -0500 Subject: [PATCH 115/267] Fixed errors in FixRxKokkos kokkos neighbor lists initialization and usage and calls to computeLocalTemperature. - Created request for kokkos neighbor list for fix and switched to that neighbor list datatype in computeLocalTemperature. - Reconfigured pre_force and setup_pre_force to call a common solve_reactions() method to avoid duplicate code. TODO: - Clean-up - Provide per-problem scratch data within kokkos framework (instead of C++ new/delete data). --- src/KOKKOS/fix_rx_kokkos.cpp | 195 ++++++++++++++++++++++++++--------- src/KOKKOS/fix_rx_kokkos.h | 11 +- 2 files changed, 148 insertions(+), 58 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 167f2713ea..1497fea6c1 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -21,6 +21,9 @@ #include "update.h" #include "respa.h" #include "modify.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" #include "error.h" #include "math_special.h" @@ -95,24 +98,61 @@ void FixRxKokkos::init() printf("Inside FixRxKokkos::init\n"); // Call the parent's version. - FixRX::init(); + //FixRX::init(); + + pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1); + if (pairDPDE == NULL) + pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1); + + if (pairDPDE == NULL) + error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix rx"); pairDPDEKK = dynamic_cast(pairDPDE); if (pairDPDEKK == NULL) error->all(FLERR,"Must use pair_style dpd/fdt/energy/kk with fix rx/kk"); + bool eos_flag = false; + for (int i = 0; i < modify->nfix; i++) + if (strcmp(modify->fix[i]->style,"eos/table/rx") == 0) eos_flag = true; + if(!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified"); + if (update_kinetics_data) create_kinetics_data(); + + // From FixRX::init() + // need a half neighbor list + // built whenever re-neighboring occurs + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + + // Update the neighbor data for Kokkos. + int neighflag = lmp->kokkos->neighflag; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else { //if (neighflag == HALF || neighflag == HALFTHREAD) + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } } /* ---------------------------------------------------------------------- */ -//template -//void FixRXKokkos::init_list(int, class NeighList* ptr) -//{ -// printf("Inside FixRxKokkos::init_list\n"); -// this->list = ptr; -//} +template +void FixRxKokkos::init_list(int, class NeighList* ptr) +{ + printf("Inside FixRxKokkos::init_list\n"); + this->list = ptr; +} /* ---------------------------------------------------------------------- */ @@ -663,37 +703,6 @@ void FixRxKokkos::operator()(SolverType, const int &i) const /* ---------------------------------------------------------------------- */ -template -void FixRxKokkos::solve_reactions(void) -{ -/* int nlocal = atom->nlocal; - if (igroup == atom->firstgroup) nlocal = atom->nfirst; - - using AT = ArrayTypes; - - atomKK->sync(execution_space, UCOND_MASK); - typename AT::t_efloat_1d uCond = atomKK->k_uCond.view(); - atomKK->sync(execution_space, UMECH_MASK); - typename AT::t_efloat_1d uMech = atomKK->k_uMech.view(); - - pairDPDEKK->k_duCond.template sync(); - typename AT::t_efloat_1d_const duCond = pairDPDEKK->k_duCond.template view(); - pairDPDEKK->k_duMech.template sync(); - typename AT::t_efloat_1d_const duMech = pairDPDEKK->k_duMech.template view(); - - auto dt = update->dt; - - Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) { - uCond(i) += 0.5*dt*duCond(i); - uMech(i) += 0.5*dt*duMech(i); - }); - - atomKK->modified(execution_space, UCOND_MASK); - atomKK->modified(execution_space, UMECH_MASK); */ -} - -/* ---------------------------------------------------------------------- */ - template void FixRxKokkos::create_kinetics_data(void) { @@ -784,6 +793,9 @@ void FixRxKokkos::setup_pre_force(int vflag) my_restartFlag = 0; else { +#if 1 + this->solve_reactions( vflag, false ); +#else const int nlocal = atom->nlocal; //const int nghost = atom->nghost; //const int *mask = atom->mask; @@ -815,6 +827,7 @@ void FixRxKokkos::setup_pre_force(int vflag) // Flag that dvector was updated on the host in the comm. atomKK->modified ( Host, DVECTOR_MASK ); +#endif } } @@ -825,6 +838,15 @@ void FixRxKokkos::pre_force(int vflag) { printf("Inside FixRxKokkos::pre_force localTempFlag= %d\n", localTempFlag); + this->solve_reactions( vflag, true ); +} +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::solve_reactions(const int vflag, const bool isPreForce) +{ + printf("Inside FixRxKokkos::solve_reactions localTempFlag= %d isPreForce= %s\n", localTempFlag, isPreForce ? "True" : "false"); + if (update_kinetics_data) create_kinetics_data(); @@ -834,7 +856,8 @@ void FixRxKokkos::pre_force(int vflag) const int nghost = atom->nghost; const int newton_pair = force->newton_pair; - const bool setToZero = false; // don't set the forward rates to zero. + //const bool setToZero = false; // don't set the forward rates to zero. + const bool setToZero = isPreForce == false; // Set the forward rates to zero if acting as setup_pre_force. if (localTempFlag) { @@ -1115,16 +1138,71 @@ void FixRxKokkos::computeLocalTemperature() const int inum = list->inum; - // Local list views. (This isn't working!) - //NeighListKokkos* k_list = static_cast*>(list); - //if (not(list->kokkos)) - //{ - // error->one(FLERR,"list is not a Kokkos list\n"); - //} + bool useKokkosLists = false; - //typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; - //typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; - //typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; + // Local list views. (This isn't working!) + NeighListKokkos* k_list = static_cast*>(list); + if (not(list->kokkos)) + { + //error->one(FLERR,"list is not a Kokkos list\n"); + printf("list is NOT a Kokkos list\n"); + + int* ilist = list->ilist; + int* numneigh = list->numneigh; + int** firstneigh = list->firstneigh; + printf("inum= %d ilist= %x\n", inum, ilist); + for (int ii = 0; ii < std::min(inum,10); ++ii) + { + const int i = ilist[ii]; + int *jlist = firstneigh[i]; + const int jnum = numneigh[i]; + const int j = (jlist[0] & NEIGHMASK); + printf(" ilist[%d]= %d j= %d jnum= %d\n", ii, i, j, jnum); + } + } + else + { + printf("It's a kokkos list\n"); + + useKokkosLists = true; + + typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; + typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; + typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; + + static FILE *fp1 = NULL; + + //if (fp1 == NULL) + // fp1 = fopen("kokkos_list.txt","w"); + + if (fp1 != NULL) + { + const int inum = list->inum; + fprintf(fp1, "inum= %d\n", inum); + for (int ii = 0; ii < inum; ++ii) + { + const int i = d_ilist[ii]; + const int jnum = d_numneigh[i]; + fprintf(fp1, " %d %d %d\n", ii, i, jnum); + for (int jj = 0; jj < jnum; ++jj) + { + const int j = (d_neighbors(i,jj) & NEIGHMASK); + fprintf(fp1, " %d %d\n", jj, j); + } + } + } + } + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d d_ilist; + typename ArrayTypes::t_int_1d d_numneigh; + + if (useKokkosLists) + { + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + d_numneigh = k_list->d_numneigh; + } int* ilist = list->ilist; int* numneigh = list->numneigh; @@ -1145,8 +1223,9 @@ void FixRxKokkos::computeLocalTemperature() double i_dpdThetaLocal = 0.0; double i_sumWeights = 0.0; - const int i = ilist[ii]; + //const int i = ilist[ii]; //const int i = d_ilist(ii); + const int i = (useKokkosLists) ? d_ilist(ii) : ilist[ii]; const double xtmp = d_x(i,0); const double ytmp = d_x(i,1); @@ -1154,13 +1233,15 @@ void FixRxKokkos::computeLocalTemperature() const int itype = d_type(i); int *jlist = firstneigh[i]; - const int jnum = numneigh[i]; + //const int jnum = numneigh[i]; //const int jnum = d_numneigh(i); + const int jnum = (useKokkosLists) ? d_numneigh(i) : numneigh[i]; for (int jj = 0; jj < jnum; jj++) { - const int j = (jlist[jj] & NEIGHMASK); + //const int j = (jlist[jj] & NEIGHMASK); //const int j = (d_neighbors(i,jj) & NEIGHMASK); + const int j = (useKokkosLists) ? (d_neighbors(i,jj) & NEIGHMASK) : (jlist[jj] & NEIGHMASK); const int jtype = d_type(j); const double delx = xtmp - d_x(j,0); @@ -1232,6 +1313,18 @@ void FixRxKokkos::computeLocalTemperature() } ); + if (false) + { + static FILE *fp = NULL; + + if (fp == NULL) + fp = fopen("kokkos_temp.txt","w"); + + fprintf(fp, "nlocal= %d %d\n", nlocal, nghost); + for (int i = 0; i < nlocal; ++i) + fprintf(fp, "%d %15.9e %15.9e\n", i, d_dpdThetaLocal[i], d_sumWeights[i]); + } + // Clean up the local kokkos data. memory->destroy_kokkos(k_cutsq, h_cutsq); memory->destroy_kokkos(k_sumWeights, sumWeights); diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index d397d91499..36b05cb210 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -25,21 +25,18 @@ FixStyle(rx/kk/host,FixRxKokkos) #include "fix_rx.h" #include "pair_dpd_fdt_energy_kokkos.h" #include "kokkos_type.h" +#include "neigh_list.h" +#include "neigh_list_kokkos.h" namespace LAMMPS_NS { -template -struct TagFixRxKokkosSolver -{ - enum { setToZero = (_setToZero == true) ? 1 : 0 }; -}; - template class FixRxKokkos : public FixRX { public: FixRxKokkos(class LAMMPS *, int, char **); virtual ~FixRxKokkos(); virtual void init(); + void init_list(int, class NeighList *); void post_constructor(); virtual void setup_pre_force(int); virtual void pre_force(int); @@ -79,7 +76,7 @@ class FixRxKokkos : public FixRX { PairDPDfdtEnergyKokkos* pairDPDEKK; double VDPD; - void solve_reactions(void); + void solve_reactions(const int vflag, const bool isPreForce = true); int rhs(double, const double *, double *, void *) const; int rhs_dense (double, const double *, double *, void *) const; From 4e8351d9c8cb26328b667882290742029d5bbdfe Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Tue, 7 Feb 2017 17:53:36 -0500 Subject: [PATCH 116/267] Code clean-up for FixRxKokkos. - Removed dead code and old errors. TODO: - Per-thread scratch data in kokkos. - ODE Diagnostics in kokkos. --- src/KOKKOS/fix_rx_kokkos.cpp | 166 +++-------------------------------- 1 file changed, 12 insertions(+), 154 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 1497fea6c1..b5055191c4 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -792,43 +792,7 @@ void FixRxKokkos::setup_pre_force(int vflag) if (my_restartFlag) my_restartFlag = 0; else - { -#if 1 this->solve_reactions( vflag, false ); -#else - const int nlocal = atom->nlocal; - //const int nghost = atom->nghost; - //const int *mask = atom->mask; - //const int newton_pair = force->newton_pair; - - typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); - - // Get up-to-date data. - atomKK->sync( execution_space, DVECTOR_MASK ); - - // The only net effect from fix_rx.cpp is to set dvector[nspecies:2*nspecies] - // since the reactions are set to zero for step 0. - Kokkos::parallel_for ( nlocal, - LAMMPS_LAMBDA(const int i) - { - for (int ispecies = 0; ispecies < nspecies; ispecies++) - d_dvector(ispecies+nspecies,i) = d_dvector(ispecies,i); - } - ); - - // Signal that dvector has been modified on this execution space. - atomKK->modified( execution_space, DVECTOR_MASK ); - - // Communicate the updated species data to all nodes - atomKK->sync ( Host, DVECTOR_MASK ); - - // Communicate the dvector to all nodes - comm->forward_comm_fix(this); - - // Flag that dvector was updated on the host in the comm. - atomKK->modified ( Host, DVECTOR_MASK ); -#endif - } } /* ---------------------------------------------------------------------- */ @@ -856,17 +820,13 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF const int nghost = atom->nghost; const int newton_pair = force->newton_pair; - //const bool setToZero = false; // don't set the forward rates to zero. - const bool setToZero = isPreForce == false; // Set the forward rates to zero if acting as setup_pre_force. + // Set the forward rates to zero if acting as setup_pre_force. + const bool setRatesToZero = (isPreForce == false); if (localTempFlag) { const int count = nlocal + (newton_pair ? nghost : 0); - //dpdThetaLocal = new double[count]; - //memset(dpdThetaLocal, 0, sizeof(double)*count); - //FixRx::computeLocalTemperature(); - memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); d_dpdThetaLocal = k_dpdThetaLocal.d_view; h_dpdThetaLocal = k_dpdThetaLocal.h_view; @@ -904,9 +864,6 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Set data needed in the operators. // ... - //int *mask = atom->mask; - //double *dpdTheta = atom->dpdTheta; - // Local references to the atomKK objects. typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); @@ -941,13 +898,12 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF CounterType counter_i; - //const double theta = (localTempFlag) ? dpdThetaLocal[i] : d_dpdTheta(i); const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i); //Compute the reaction rate constants for (int irxn = 0; irxn < nreactions; irxn++) { - if (setToZero) + if (setRatesToZero) userData.kFor[irxn] = 0.0; else { @@ -1068,16 +1024,6 @@ template template void FixRxKokkos::computeLocalTemperature() { - - //int inum,jnum,itype,jtype; - //double xtmp,ytmp,ztmp,delx,dely,delz; - //double rsq; - //int *ilist,*jlist,*numneigh,**firstneigh; - - //double **x = atom->x; - //int *type = atom->type; - //double *dpdTheta = atom->dpdTheta; - typename ArrayTypes::t_x_array_randomread d_x = atomKK->k_x.view(); typename ArrayTypes::t_int_1d_randomread d_type = atomKK->k_type.view(); typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); @@ -1086,12 +1032,8 @@ void FixRxKokkos::computeLocalTemperature() const int nlocal = atom->nlocal; const int nghost = atom->nghost; - //const int newton_pair = force->newton_pair; - printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); - - // local temperature variables - //double wij=0.0; + printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); // Pull from pairDPDE. The pairDPDEKK objects are protected so recreate here for now. //pairDPDEKK->k_cutsq.template sync(); @@ -1121,8 +1063,6 @@ void FixRxKokkos::computeLocalTemperature() // Initialize the local temperature weight array int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); - //sumWeights = new double[sumWeightsCt]; - //memset(sumWeights, 0, sizeof(double)*sumWeightsCt); memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); d_sumWeights = k_sumWeights.d_view; @@ -1136,77 +1076,16 @@ void FixRxKokkos::computeLocalTemperature() } ); - const int inum = list->inum; - - bool useKokkosLists = false; - // Local list views. (This isn't working!) NeighListKokkos* k_list = static_cast*>(list); if (not(list->kokkos)) - { - //error->one(FLERR,"list is not a Kokkos list\n"); - printf("list is NOT a Kokkos list\n"); + error->one(FLERR,"list is not a Kokkos list\n"); - int* ilist = list->ilist; - int* numneigh = list->numneigh; - int** firstneigh = list->firstneigh; - printf("inum= %d ilist= %x\n", inum, ilist); - for (int ii = 0; ii < std::min(inum,10); ++ii) - { - const int i = ilist[ii]; - int *jlist = firstneigh[i]; - const int jnum = numneigh[i]; - const int j = (jlist[0] & NEIGHMASK); - printf(" ilist[%d]= %d j= %d jnum= %d\n", ii, i, j, jnum); - } - } - else - { - printf("It's a kokkos list\n"); + typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; + typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; + typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; - useKokkosLists = true; - - typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; - typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; - typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; - - static FILE *fp1 = NULL; - - //if (fp1 == NULL) - // fp1 = fopen("kokkos_list.txt","w"); - - if (fp1 != NULL) - { - const int inum = list->inum; - fprintf(fp1, "inum= %d\n", inum); - for (int ii = 0; ii < inum; ++ii) - { - const int i = d_ilist[ii]; - const int jnum = d_numneigh[i]; - fprintf(fp1, " %d %d %d\n", ii, i, jnum); - for (int jj = 0; jj < jnum; ++jj) - { - const int j = (d_neighbors(i,jj) & NEIGHMASK); - fprintf(fp1, " %d %d\n", jj, j); - } - } - } - } - - typename ArrayTypes::t_neighbors_2d d_neighbors; - typename ArrayTypes::t_int_1d d_ilist; - typename ArrayTypes::t_int_1d d_numneigh; - - if (useKokkosLists) - { - d_neighbors = k_list->d_neighbors; - d_ilist = k_list->d_ilist; - d_numneigh = k_list->d_numneigh; - } - - int* ilist = list->ilist; - int* numneigh = list->numneigh; - int** firstneigh = list->firstneigh; + const int inum = list->inum; // loop over neighbors of my atoms Kokkos::parallel_for ( inum, @@ -1223,25 +1102,18 @@ void FixRxKokkos::computeLocalTemperature() double i_dpdThetaLocal = 0.0; double i_sumWeights = 0.0; - //const int i = ilist[ii]; - //const int i = d_ilist(ii); - const int i = (useKokkosLists) ? d_ilist(ii) : ilist[ii]; + const int i = d_ilist(ii); const double xtmp = d_x(i,0); const double ytmp = d_x(i,1); const double ztmp = d_x(i,2); const int itype = d_type(i); - int *jlist = firstneigh[i]; - //const int jnum = numneigh[i]; - //const int jnum = d_numneigh(i); - const int jnum = (useKokkosLists) ? d_numneigh(i) : numneigh[i]; + const int jnum = d_numneigh(i); for (int jj = 0; jj < jnum; jj++) { - //const int j = (jlist[jj] & NEIGHMASK); - //const int j = (d_neighbors(i,jj) & NEIGHMASK); - const int j = (useKokkosLists) ? (d_neighbors(i,jj) & NEIGHMASK) : (jlist[jj] & NEIGHMASK); + const int j = (d_neighbors(i,jj) & NEIGHMASK); const int jtype = d_type(j); const double delx = xtmp - d_x(j,0); @@ -1313,23 +1185,9 @@ void FixRxKokkos::computeLocalTemperature() } ); - if (false) - { - static FILE *fp = NULL; - - if (fp == NULL) - fp = fopen("kokkos_temp.txt","w"); - - fprintf(fp, "nlocal= %d %d\n", nlocal, nghost); - for (int i = 0; i < nlocal; ++i) - fprintf(fp, "%d %15.9e %15.9e\n", i, d_dpdThetaLocal[i], d_sumWeights[i]); - } - // Clean up the local kokkos data. memory->destroy_kokkos(k_cutsq, h_cutsq); memory->destroy_kokkos(k_sumWeights, sumWeights); - - //delete [] sumWeights; } /* ---------------------------------------------------------------------- */ From 93d99ec8d0576aebebbc7658a891013326de6f6c Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Thu, 9 Feb 2017 22:38:58 -0500 Subject: [PATCH 117/267] Added ODE diagnostics to FixRxKokkos using Kokkos managed data. - Added the diagnostics performance analysis routine to FixRxKokkos using Kokkos views. TODO: - Switch to using Kokkos data for the per-iteration scratch data. How to allocate only enouch for each work-unit and not all iterations? Can the shared-memory scratch memory work for this, even for large sizes? --- src/KOKKOS/fix_rx_kokkos.cpp | 231 +++++++++++++++++++++++++++++++---- src/KOKKOS/fix_rx_kokkos.h | 13 ++ 2 files changed, 223 insertions(+), 21 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index b5055191c4..2a3fc7547a 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -879,11 +879,22 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Average DPD volume. Used in the RHS function. this->VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; - /*if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) + if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) { - memory->create( diagnosticCounterPerODE[StepSum], nlocal, "FixRX::diagnosticCounterPerODE"); - memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE"); - }*/ + memory->create_kokkos (k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps, nlocal, "FixRxKokkos::diagnosticCounterPerODEnSteps"); + memory->create_kokkos (k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs, nlocal, "FixRxKokkos::diagnosticCounterPerODEnFuncs"); + + d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.d_view; + d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.d_view; + + Kokkos::parallel_for ( nlocal, + LAMMPS_LAMBDA(const int i) + { + d_diagnosticCounterPerODEnSteps(i) = 0; + d_diagnosticCounterPerODEnFuncs(i) = 0; + } + ); + } Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) { @@ -930,12 +941,11 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF { rkf45(nspecies, t_stop, y, rwork, &userData, counter_i); - //if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL) - //if (diagnosticCounterPerODE[StepSum] != NULL) - //{ - // diagnosticCounterPerODE[StepSum][i] = counter_i.nSteps; - // diagnosticCounterPerODE[FuncSum][i] = counter_i.nFuncs; - //} + if (diagnosticFrequency == 1) + { + d_diagnosticCounterPerODEnSteps(i) = counter_i.nSteps; + d_diagnosticCounterPerODEnFuncs(i) = counter_i.nFuncs; + } } // Store the solution back in dvector. @@ -975,10 +985,7 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF atomKK->modified ( Host, DVECTOR_MASK ); if (localTempFlag) - { - //delete [] dpdThetaLocal; memory->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); - } TimerType timer_stop = getTimeStamp(); @@ -997,12 +1004,12 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF error->warning(FLERR, sbuf); } -/* // Compute and report ODE diagnostics, if requested. - if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency != 0){ + if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency != 0) + { // Update the counters. - diagnosticCounter[StepSum] += nSteps; - diagnosticCounter[FuncSum] += nFuncs; + diagnosticCounter[StepSum] += TotalCounters.nSteps; + diagnosticCounter[FuncSum] += TotalCounters.nFuncs; diagnosticCounter[TimeSum] += time_ODE; diagnosticCounter[AtomSum] += nlocal; diagnosticCounter[numDiagnosticCounters-1] ++; @@ -1011,11 +1018,193 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF ((update->ntimestep - update->firststep) % diagnosticFrequency) == 0) || (diagnosticFrequency < 0 && update->ntimestep == update->laststep) ) this->odeDiagnostics(); + } +} - for (int i = 0; i < numDiagnosticCounters; ++i) - if (diagnosticCounterPerODE[i]) - memory->destroy( diagnosticCounterPerODE[i] ); - } */ +/* ---------------------------------------------------------------------- */ + +template +void FixRxKokkos::odeDiagnostics(void) +{ + TimerType timer_start = getTimeStamp(); + + // Compute: + // 1) Average # of ODE integrator steps and RHS evaluations per atom globally. + // 2) RMS # of ... + // 3) Average # of ODE steps and RHS evaluations per MPI task. + // 4) RMS # of ODE steps and RHS evaluations per MPI task. + // 5) MAX # of ODE steps and RHS evaluations per MPI task. + // + // ... 1,2 are for ODE control diagnostics. + // ... 3-5 are for load balancing diagnostics. + // + // To do this, we'll need to + // a) Allreduce (sum) the sum of nSteps / nFuncs. Dividing by atom->natoms + // gives the avg # of steps/funcs per atom globally. + // b) Reduce (sum) to root the sum of squares of the differences. + // i) Sum_i (steps_i - avg_steps_global)^2 + // ii) Sum_i (funcs_i - avg_funcs_global)^2 + // iii) (avg_steps_local - avg_steps_global)^2 + // iv) (avg_funcs_local - avg_funcs_global)^2 + + const int numCounters = numDiagnosticCounters-1; + + // # of time-steps for averaging. + const int nTimes = this->diagnosticCounter[numDiagnosticCounters-1]; + + // # of ODE's per time-step (on average). + //const int nODEs = this->diagnosticCounter[AtomSum] / nTimes; + + // Sum up the sums from each task. + double sums[numCounters]; + double my_vals[numCounters]; + double max_per_proc[numCounters]; + double min_per_proc[numCounters]; + + // Compute counters per dpd time-step. + for (int i = 0; i < numCounters; ++i){ + my_vals[i] = this->diagnosticCounter[i] / nTimes; + //printf("my sum[%d] = %f %d\n", i, my_vals[i], comm->me); + } + + MPI_Allreduce (my_vals, sums, numCounters, MPI_DOUBLE, MPI_SUM, world); + + MPI_Reduce (my_vals, max_per_proc, numCounters, MPI_DOUBLE, MPI_MAX, 0, world); + MPI_Reduce (my_vals, min_per_proc, numCounters, MPI_DOUBLE, MPI_MIN, 0, world); + + const double nODEs = sums[numCounters-1]; + + double avg_per_atom[numCounters], avg_per_proc[numCounters]; + + // Averages per-ODE and per-proc per time-step. + for (int i = 0; i < numCounters; ++i){ + avg_per_atom[i] = sums[i] / nODEs; + avg_per_proc[i] = sums[i] / comm->nprocs; + } + + // Sum up the differences from each task. + double sum_sq[2*numCounters]; + double my_sum_sq[2*numCounters]; + for (int i = 0; i < numCounters; ++i){ + double diff_i = my_vals[i] - avg_per_proc[i]; + my_sum_sq[i] = diff_i * diff_i; + } + + double max_per_ODE[numCounters], min_per_ODE[numCounters]; + + // Process the per-ODE RMS of the # of steps/funcs + if (diagnosticFrequency == 1) + { + h_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.h_view; + h_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.h_view; + + Kokkos::deep_copy( h_diagnosticCounterPerODEnSteps, d_diagnosticCounterPerODEnSteps ); + Kokkos::deep_copy( h_diagnosticCounterPerODEnFuncs, d_diagnosticCounterPerODEnFuncs ); + + double my_max[numCounters], my_min[numCounters]; + + const int nlocal = atom->nlocal; + HAT::t_int_1d h_mask = atomKK->k_mask.h_view; + + for (int i = 0; i < numCounters; ++i) + { + my_sum_sq[i+numCounters] = 0; + my_max[i] = 0; + my_min[i] = DBL_MAX; + } + + for (int j = 0; j < nlocal; ++j) + if (h_mask(j) & groupbit) + { + int nSteps = h_diagnosticCounterPerODEnSteps(j); + double diff_nSteps = double( nSteps ) - avg_per_atom[StepSum]; + my_sum_sq[StepSum+numCounters] += diff_nSteps*diff_nSteps; + my_max[StepSum] = std::max( my_max[StepSum], (double)nSteps ); + my_min[StepSum] = std::min( my_min[StepSum], (double)nSteps ); + + int nFuncs = h_diagnosticCounterPerODEnFuncs(j); + double diff_nFuncs = double( nFuncs ) - avg_per_atom[FuncSum]; + my_sum_sq[FuncSum+numCounters] += diff_nFuncs*diff_nFuncs; + + my_max[FuncSum] = std::max( my_max[FuncSum], (double)nFuncs ); + my_min[FuncSum] = std::min( my_min[FuncSum], (double)nFuncs ); + } + + memory->destroy_kokkos( k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps ); + memory->destroy_kokkos( k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs ); + + MPI_Reduce (my_sum_sq, sum_sq, 2*numCounters, MPI_DOUBLE, MPI_SUM, 0, world); + + MPI_Reduce (my_max, max_per_ODE, numCounters, MPI_DOUBLE, MPI_MAX, 0, world); + MPI_Reduce (my_min, min_per_ODE, numCounters, MPI_DOUBLE, MPI_MIN, 0, world); + } + else + MPI_Reduce (my_sum_sq, sum_sq, numCounters, MPI_DOUBLE, MPI_SUM, 0, world); + + TimerType timer_stop = getTimeStamp(); + double time_local = getElapsedTime( timer_start, timer_stop ); + + if (comm->me == 0){ + char smesg[128]; + +#define print_mesg(smesg) {\ + if (screen) fprintf(screen,"%s\n", smesg); \ + if (logfile) fprintf(logfile,"%s\n", smesg); } + + sprintf(smesg, "FixRX::ODE Diagnostics: # of iters |# of rhs evals| run-time (sec) | # atoms"); + print_mesg(smesg); + + sprintf(smesg, " AVG per ODE : %-12.5g | %-12.5g | %-12.5g", avg_per_atom[0], avg_per_atom[1], avg_per_atom[2]); + print_mesg(smesg); + + // only valid for single time-step! + if (diagnosticFrequency == 1){ + double rms_per_ODE[numCounters]; + for (int i = 0; i < numCounters; ++i) + rms_per_ODE[i] = sqrt( sum_sq[i+numCounters] / nODEs ); + + sprintf(smesg, " RMS per ODE : %-12.5g | %-12.5g ", rms_per_ODE[0], rms_per_ODE[1]); + print_mesg(smesg); + + sprintf(smesg, " MAX per ODE : %-12.5g | %-12.5g ", max_per_ODE[0], max_per_ODE[1]); + print_mesg(smesg); + + sprintf(smesg, " MIN per ODE : %-12.5g | %-12.5g ", min_per_ODE[0], min_per_ODE[1]); + print_mesg(smesg); + } + + sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]); + print_mesg(smesg); + + if (comm->nprocs > 1){ + double rms_per_proc[numCounters]; + for (int i = 0; i < numCounters; ++i) + rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs ); + + sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2], rms_per_proc[AtomSum]); + print_mesg(smesg); + + sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2], max_per_proc[AtomSum]); + print_mesg(smesg); + + sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2], min_per_proc[AtomSum]); + print_mesg(smesg); + } + + sprintf(smesg, " AVG'd over %d time-steps", nTimes); + print_mesg(smesg); + sprintf(smesg, " AVG'ing took %g sec", time_local); + print_mesg(smesg); + +#undef print_mesg + + } + + // Reset the counters. + for (int i = 0; i < numDiagnosticCounters; ++i) + diagnosticCounter[i] = 0; + + return; } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 36b05cb210..4a11ac9fb9 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -97,6 +97,19 @@ class FixRxKokkos : public FixRX { const double hmin, const double hmax, double& h0, double y[], double rwk[], void *v_params) const; + //!< ODE Solver diagnostics. + void odeDiagnostics(void); + + //!< Special counters per-ode. + int *diagnosticCounterPerODEnSteps; + int *diagnosticCounterPerODEnFuncs; + DAT::tdual_int_1d k_diagnosticCounterPerODEnSteps; + DAT::tdual_int_1d k_diagnosticCounterPerODEnFuncs; + typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnSteps; + typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnFuncs; + typename HAT::t_int_1d h_diagnosticCounterPerODEnSteps; + typename HAT::t_int_1d h_diagnosticCounterPerODEnFuncs; + template struct KineticsType { From 4ac7a5d1f2e6132595c8999090e7b4159aa6971a Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sun, 12 Feb 2017 21:21:11 -0500 Subject: [PATCH 118/267] Added Kokkos-like array datatype into RK4 and RHS in FixRXKokkos. - Created an Array class that provides stride access for operator[] w/o needing Kokkos views. This was designed to avoid the performance issues encountered with Views and sub-views throughout the RHS and ODE solver functions. --- src/KOKKOS/fix_rx_kokkos.cpp | 520 ++++++++++++++++++++++++++++++++++- src/KOKKOS/fix_rx_kokkos.h | 52 +++- 2 files changed, 570 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 2a3fc7547a..a6da0306bb 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -202,6 +202,373 @@ void FixRxKokkos::rk4(const double t_stop, double *y, double *rwork, /* ---------------------------------------------------------------------- */ +template + template +void FixRxKokkos::k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const +{ + double *k1 = rwork; + double *k2 = k1 + nspecies; + double *k3 = k2 + nspecies; + double *k4 = k3 + nspecies; + double *yp = k4 + nspecies; + + const int numSteps = minSteps; + + const double h = t_stop / double(numSteps); + + // Run the requested steps with h. + for (int step = 0; step < numSteps; step++) + { + // k1 + k_rhs(0.0,y,k1, userData); + + // k2 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies]; + + k_rhs(0.0,yp,k2, userData); + + // k3 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies]; + + k_rhs(0.0,yp,k3, userData); + + // k4 + for (int ispecies = 0; ispecies < nspecies; ispecies++) + yp[ispecies] = y[ispecies] + h*k3[ispecies]; + + k_rhs(0.0,yp,k4, userData); + + for (int ispecies = 0; ispecies < nspecies; ispecies++) + y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0); + + } // end for (int step... + +} + +/* ---------------------------------------------------------------------- */ + +// f1 = dt*f(t,x) +// f2 = dt*f(t+ c20*dt,x + c21*f1) +// f3 = dt*f(t+ c30*dt,x + c31*f1 + c32*f2) +// f4 = dt*f(t+ c40*dt,x + c41*f1 + c42*f2 + c43*f3) +// f5 = dt*f(t+dt,x + c51*f1 + c52*f2 + c53*f3 + c54*f4) +// f6 = dt*f(t+ c60*dt,x + c61*f1 + c62*f2 + c63*f3 + c64*f4 + c65*f5) +// +// fifth-order runge-kutta integration +// x5 = x + b1*f1 + b3*f3 + b4*f4 + b5*f5 + b6*f6 +// fourth-order runge-kutta integration +// x = x + a1*f1 + a3*f3 + a4*f4 + a5*f5 + +template + template +void FixRxKokkos::k_rkf45_step (const int neq, const double h, double y[], double y_out[], double rwk[], UserDataType& userData) const +{ + const double c21=0.25; + const double c31=0.09375; + const double c32=0.28125; + const double c41=0.87938097405553; + const double c42=-3.2771961766045; + const double c43=3.3208921256258; + const double c51=2.0324074074074; + const double c52=-8.0; + const double c53=7.1734892787524; + const double c54=-0.20589668615984; + const double c61=-0.2962962962963; + const double c62=2.0; + const double c63=-1.3816764132554; + const double c64=0.45297270955166; + const double c65=-0.275; + const double a1=0.11574074074074; + const double a3=0.54892787524366; + const double a4=0.5353313840156; + const double a5=-0.2; + const double b1=0.11851851851852; + const double b3=0.51898635477583; + const double b4=0.50613149034201; + const double b5=-0.18; + const double b6=0.036363636363636; + + // local dependent variables (5 total) + double* f1 = &rwk[ 0]; + double* f2 = &rwk[ neq]; + double* f3 = &rwk[2*neq]; + double* f4 = &rwk[3*neq]; + double* f5 = &rwk[4*neq]; + double* f6 = &rwk[5*neq]; + + // scratch for the intermediate solution. + //double* ytmp = &rwk[6*neq]; + double* ytmp = y_out; + + // 1) + k_rhs (0.0, y, f1, userData); + + for (int k = 0; k < neq; k++){ + f1[k] *= h; + ytmp[k] = y[k] + c21 * f1[k]; + } + + // 2) + k_rhs(0.0, ytmp, f2, userData); + + for (int k = 0; k < neq; k++){ + f2[k] *= h; + ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k]; + } + + // 3) + k_rhs(0.0, ytmp, f3, userData); + + for (int k = 0; k < neq; k++) { + f3[k] *= h; + ytmp[k] = y[k] + c41 * f1[k] + c42 * f2[k] + c43 * f3[k]; + } + + // 4) + k_rhs(0.0, ytmp, f4, userData); + + for (int k = 0; k < neq; k++) { + f4[k] *= h; + ytmp[k] = y[k] + c51 * f1[k] + c52 * f2[k] + c53 * f3[k] + c54 * f4[k]; + } + + // 5) + k_rhs(0.0, ytmp, f5, userData); + + for (int k = 0; k < neq; k++) { + f5[k] *= h; + ytmp[k] = y[k] + c61*f1[k] + c62*f2[k] + c63*f3[k] + c64*f4[k] + c65*f5[k]; + } + + // 6) + k_rhs(0.0, ytmp, f6, userData); + + for (int k = 0; k < neq; k++) + { + //const double f6 = h * ydot[k]; + f6[k] *= h; + + // 5th-order solution. + const double r5 = b1*f1[k] + b3*f3[k] + b4*f4[k] + b5*f5[k] + b6*f6[k]; + + // 4th-order solution. + const double r4 = a1*f1[k] + a3*f3[k] + a4*f4[k] + a5*f5[k]; + + // Truncation error: difference between 4th and 5th-order solutions. + rwk[k] = fabs(r5 - r4); + + // Update solution. + //y_out[k] = y[k] + r5; // Local extrapolation + y_out[k] = y[k] + r4; + } + + return; +} + +template + template +int FixRxKokkos::k_rkf45_h0 + (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], UserDataType& userData) const +{ + // Set lower and upper bounds on h0, and take geometric mean as first trial value. + // Exit with this value if the bounds cross each other. + + // Adjust upper bound based on ydot ... + double hg = sqrt(hmin*hmax); + + //if (hmax < hmin) + //{ + // h0 = hg; + // return; + //} + + // Start iteration to find solution to ... {WRMS norm of (h0^2 y'' / 2)} = 1 + + double *ydot = rwk; + double *y1 = ydot + neq; + double *ydot1 = y1 + neq; + + const int max_iters = 10; + bool hnew_is_ok = false; + double hnew = hg; + int iter = 0; + + // compute ydot at t=t0 + k_rhs (t, y, ydot, userData); + + while(1) + { + // Estimate y'' with finite-difference ... + + for (int k = 0; k < neq; k++) + y1[k] = y[k] + hg * ydot[k]; + + // compute y' at t1 + k_rhs (t + hg, y1, ydot1, userData); + + // Compute WRMS norm of y'' + double yddnrm = 0.0; + for (int k = 0; k < neq; k++){ + double ydd = (ydot1[k] - ydot[k]) / hg; + double wterr = ydd / (relTol * fabs( y[k] ) + absTol); + yddnrm += wterr * wterr; + } + + yddnrm = sqrt( yddnrm / double(neq) ); + + //std::cout << "iter " << _iter << " hg " << hg << " y'' " << yddnrm << std::endl; + //std::cout << "ydot " << ydot[neq-1] << std::endl; + + // should we accept this? + if (hnew_is_ok || iter == max_iters){ + hnew = hg; + if (iter == max_iters) + fprintf(stderr, "ERROR_HIN_MAX_ITERS\n"); + break; + } + + // Get the new value of h ... + hnew = (yddnrm*hmax*hmax > 2.0) ? sqrt(2.0 / yddnrm) : sqrt(hg * hmax); + + // test the stopping conditions. + double hrat = hnew / hg; + + // Accept this value ... the bias factor should bring it within range. + if ( (hrat > 0.5) && (hrat < 2.0) ) + hnew_is_ok = true; + + // If y'' is still bad after a few iterations, just accept h and give up. + if ( (iter > 1) && hrat > 2.0 ) { + hnew = hg; + hnew_is_ok = true; + } + + //printf("iter=%d, yddnrw=%e, hnew=%e, hmin=%e, hmax=%e\n", iter, yddnrm, hnew, hmin, hmax); + + hg = hnew; + iter ++; + } + + // bound and bias estimate + h0 = hnew * 0.5; + h0 = fmax(h0, hmin); + h0 = fmin(h0, hmax); + //printf("h0=%e, hmin=%e, hmax=%e\n", h0, hmin, hmax); + + return (iter + 1); +} + +template + template +void FixRxKokkos::k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const +{ + // Rounding coefficient. + const double uround = DBL_EPSILON; + + // Adaption limit (shrink or grow) + const double adaption_limit = 4.0; + + // Safety factor on the adaption. very specific but not necessary .. 0.9 is common. + const double hsafe = 0.840896415; + + // Time rounding factor. + const double tround = t_stop * uround; + + // Counters for diagnostics. + int nst = 0; // # of steps (accepted) + int nit = 0; // # of iterations total + int nfe = 0; // # of RHS evaluations + + // Min/Max step-size limits. + const double h_min = 100.0 * tround; + const double h_max = (minSteps > 0) ? t_stop / double(minSteps) : t_stop; + + // Set the initial step-size. 0 forces an internal estimate ... stable Euler step size. + double h = (minSteps > 0) ? t_stop / double(minSteps) : 0.0; + + double t = 0.0; + + if (h < h_min){ + //fprintf(stderr,"hin not implemented yet\n"); + //exit(-1); + nfe = k_rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, userData); + } + + //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h); + + // Integrate until we reach the end time. + while (fabs(t - t_stop) > tround){ + double *yout = rwork; + double *eout = yout + neq; + + // Take a trial step. + k_rkf45_step (neq, h, y, yout, eout, userData); + + // Estimate the solution error. + // ... weighted 2-norm of the error. + double err2 = 0.0; + for (int k = 0; k < neq; k++){ + const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol); + err2 += wterr * wterr; + } + + double err = fmax( uround, sqrt( err2 / double(nspecies) )); + + // Accept the solution? + if (err <= 1.0 || h <= h_min){ + t += h; + nst++; + + for (int k = 0; k < neq; k++) + y[k] = yout[k]; + } + + // Adjust h for the next step. + double hfac = hsafe * sqrt( sqrt( 1.0 / err ) ); + + // Limit the adaption. + hfac = fmax( hfac, 1.0 / adaption_limit ); + hfac = fmin( hfac, adaption_limit ); + + // Apply the adaption factor... + h *= hfac; + + // Limit h. + h = fmin( h, h_max ); + h = fmax( h, h_min ); + + // Stretch h if we're within 5% ... and we didn't just fail. + if (err <= 1.0 && (t + 1.05*h) > t_stop) + h = t_stop - t; + + // And don't overshoot the end. + if (t + h > t_stop) + h = t_stop - t; + + nit++; + nfe += 6; + + if (maxIters && nit > maxIters){ + //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop); + counter.nFails ++; + break; + // We should set an error here so that the solution is not used! + } + + } // end while + + counter.nSteps += nst; + counter.nIters += nit; + counter.nFuncs += nfe; + + //printf("id= %d nst= %d nit= %d\n", id, nst, nit); +} +/* ---------------------------------------------------------------------- */ + // f1 = dt*f(t,x) // f2 = dt*f(t+ c20*dt,x + c21*f1) // f3 = dt*f(t+ c30*dt,x + c31*f1 + c32*f2) @@ -664,6 +1031,152 @@ int FixRxKokkos::rhs_sparse(double t, const double *y, double *dydt, /* ---------------------------------------------------------------------- */ +template + template +int FixRxKokkos::k_rhs(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + //StridedArrayType _y( const_cast( y ) ), _dydt( dydt ); + + // Use the sparse format instead. + if (useSparseKinetics) + return this->k_rhs_sparse( t, y, dydt, userData); + else + return this->k_rhs_dense ( t, y, dydt, userData); +} + +/* ---------------------------------------------------------------------- */ + +template + template +int FixRxKokkos::k_rhs_dense(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + #define rxnRateLaw (userData.rxnRateLaw) + #define kFor (userData.kFor ) + + //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; + //const int nspecies = atom->nspecies_dpd; + + for(int ispecies=0; ispecies + template +int FixRxKokkos::k_rhs_sparse(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const +{ + #define kFor (userData.kFor) + #define kRev (NULL) + #define rxnRateLaw (userData.rxnRateLaw) + #define conc (dydt) + #define maxReactants (this->sparseKinetics_maxReactants) + #define maxSpecies (this->sparseKinetics_maxSpecies) + #define nuk (this->d_kineticsData.nuk) + #define nu (this->d_kineticsData.nu) + #define inu (this->d_kineticsData.inu) + #define isIntegral(idx) ( SparseKinetics_enableIntegralReactions \ + && this->d_kineticsData.isIntegral(idx) ) + + for (int k = 0; k < nspecies; ++k) + conc[k] = y[k] / VDPD; + + // Construct the reaction rate laws + for (int i = 0; i < nreactions; ++i) + { + double rxnRateLawForward; + if (isIntegral(i)){ + rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) ); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= powint( conc[k], inu(i,kk) ); + } + } else { + rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) ); + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + rxnRateLawForward *= pow( conc[k], nu(i,kk) ); + } + } + + rxnRateLaw[i] = rxnRateLawForward; + } + + // Construct the reaction rates for each species from the + // Stoichiometric matrix and ROP vector. + for (int k = 0; k < nspecies; ++k) + dydt[k] = 0.0; + + for (int i = 0; i < nreactions; ++i){ + // Reactants ... + dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i]; + for (int kk = 1; kk < maxReactants; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] -= nu(i,kk) * rxnRateLaw[i]; + } + + // Products ... + dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i]; + for (int kk = maxReactants+1; kk < maxSpecies; ++kk){ + const int k = nuk(i,kk); + if (k == SparseKinetics_invalidIndex) break; + //if (k != SparseKinetics_invalidIndex) + dydt[k] += nu(i,kk) * rxnRateLaw[i]; + } + } + + // Add in the volume factor to convert to the proper units. + for (int k = 0; k < nspecies; ++k) + dydt[k] *= VDPD; + + #undef kFor + #undef kRev + #undef rxnRateLaw + #undef conc + #undef maxReactants + #undef maxSpecies + #undef nuk + #undef nu + #undef inu + #undef isIntegral + //#undef invalidIndex + + return 0; +} + +/* ---------------------------------------------------------------------- */ + /*template template KOKKOS_INLINE_FUNCTION @@ -907,6 +1420,10 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF userData.kFor = new double[nreactions]; userData.rxnRateLaw = new double[nreactions]; + UserRHSDataKokkos<1> userDataKokkos; + userDataKokkos.kFor.m_data = userData.kFor; + userDataKokkos.rxnRateLaw.m_data = userData.rxnRateLaw; + CounterType counter_i; const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i); @@ -935,7 +1452,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Solver the ODE system. if (odeIntegrationFlag == ODE_LAMMPS_RK4) { - rk4(t_stop, y, rwork, &userData); + //rk4(t_stop, y, rwork, &userData); + k_rk4(t_stop, y, rwork, userDataKokkos); } else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) { diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 4a11ac9fb9..e36d606525 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -76,12 +76,43 @@ class FixRxKokkos : public FixRX { PairDPDfdtEnergyKokkos* pairDPDEKK; double VDPD; + template + struct StridedArrayType + { + typedef T value_type; + enum { Stride = stride }; + + value_type *m_data; + + StridedArrayType() : m_data(NULL) {} + StridedArrayType(value_type *ptr) : m_data(ptr) {} + + inline value_type& operator()(const int idx) { return m_data[Stride*idx]; } + inline const value_type& operator()(const int idx) const { return m_data[Stride*idx]; } + inline value_type& operator[](const int idx) { return m_data[Stride*idx]; } + inline const value_type& operator[](const int idx) const { return m_data[Stride*idx]; } + }; + + template + struct UserRHSDataKokkos + { + StridedArrayType kFor; + StridedArrayType rxnRateLaw; + }; + void solve_reactions(const int vflag, const bool isPreForce = true); - int rhs(double, const double *, double *, void *) const; + int rhs (double, const double *, double *, void *) const; int rhs_dense (double, const double *, double *, void *) const; int rhs_sparse(double, const double *, double *, void *) const; + template + int k_rhs (double, const VectorType&, VectorType&, UserDataType& ) const; + template + int k_rhs_dense (double, const VectorType&, VectorType&, UserDataType& ) const; + template + int k_rhs_sparse(double, const VectorType&, VectorType&, UserDataType& ) const; + //!< Classic Runge-Kutta 4th-order stepper. void rk4(const double t_stop, double *y, double *rwork, void *v_params) const; @@ -97,6 +128,25 @@ class FixRxKokkos : public FixRX { const double hmin, const double hmax, double& h0, double y[], double rwk[], void *v_params) const; + //!< Classic Runge-Kutta 4th-order stepper. + template + void k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const; + + //!< Runge-Kutta-Fehlberg ODE Solver. + template + void k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const; + + //!< Runge-Kutta-Fehlberg ODE stepper function. + template + void k_rkf45_step (const int neq, const double h, double y[], double y_out[], + double rwk[], UserDataType& userData) const; + + //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. + template + int k_rkf45_h0 (const int neq, const double t, const double t_stop, + const double hmin, const double hmax, + double& h0, double y[], double rwk[], UserDataType& userData) const; + //!< ODE Solver diagnostics. void odeDiagnostics(void); From 2f32c1a9af6f8a8bb39c69f051552263dc313572 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Sun, 12 Feb 2017 22:48:02 -0500 Subject: [PATCH 119/267] Switched to using Kokkos device data for ODE scratch data. - Finished porting all scratch arrays to using the StridedArrayType template. - Created a single, large Kokkos device array and using that for all scratch data passed into the StridedArrayType objects. --- src/KOKKOS/fix_rx_kokkos.cpp | 101 ++++++++++++++++++++--------------- src/KOKKOS/fix_rx_kokkos.h | 18 +++---- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index a6da0306bb..09a122a108 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -203,14 +203,14 @@ void FixRxKokkos::rk4(const double t_stop, double *y, double *rwork, /* ---------------------------------------------------------------------- */ template - template -void FixRxKokkos::k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const + template +void FixRxKokkos::k_rk4(const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData) const { - double *k1 = rwork; - double *k2 = k1 + nspecies; - double *k3 = k2 + nspecies; - double *k4 = k3 + nspecies; - double *yp = k4 + nspecies; + VectorType k1( rwork ); + VectorType k2( &k1[nspecies] ); + VectorType k3( &k2[nspecies] ); + VectorType k4( &k3[nspecies] ); + VectorType yp( &k4[nspecies] ); const int numSteps = minSteps; @@ -262,8 +262,8 @@ void FixRxKokkos::k_rk4(const double t_stop, double *y, double *rwor // x = x + a1*f1 + a3*f3 + a4*f4 + a5*f5 template - template -void FixRxKokkos::k_rkf45_step (const int neq, const double h, double y[], double y_out[], double rwk[], UserDataType& userData) const + template +void FixRxKokkos::k_rkf45_step (const int neq, const double h, VectorType& y, VectorType& y_out, VectorType& rwk, UserDataType& userData) const { const double c21=0.25; const double c31=0.09375; @@ -291,16 +291,15 @@ void FixRxKokkos::k_rkf45_step (const int neq, const double h, doubl const double b6=0.036363636363636; // local dependent variables (5 total) - double* f1 = &rwk[ 0]; - double* f2 = &rwk[ neq]; - double* f3 = &rwk[2*neq]; - double* f4 = &rwk[3*neq]; - double* f5 = &rwk[4*neq]; - double* f6 = &rwk[5*neq]; + VectorType& f1 = rwk; + VectorType f2( &rwk[ neq] ); + VectorType f3( &rwk[2*neq] ); + VectorType f4( &rwk[3*neq] ); + VectorType f5( &rwk[4*neq] ); + VectorType f6( &rwk[5*neq] ); // scratch for the intermediate solution. - //double* ytmp = &rwk[6*neq]; - double* ytmp = y_out; + VectorType& ytmp = y_out; // 1) k_rhs (0.0, y, f1, userData); @@ -368,11 +367,11 @@ void FixRxKokkos::k_rkf45_step (const int neq, const double h, doubl } template - template + template int FixRxKokkos::k_rkf45_h0 (const int neq, const double t, const double t_stop, const double hmin, const double hmax, - double& h0, double y[], double rwk[], UserDataType& userData) const + double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const { // Set lower and upper bounds on h0, and take geometric mean as first trial value. // Exit with this value if the bounds cross each other. @@ -388,9 +387,9 @@ int FixRxKokkos::k_rkf45_h0 // Start iteration to find solution to ... {WRMS norm of (h0^2 y'' / 2)} = 1 - double *ydot = rwk; - double *y1 = ydot + neq; - double *ydot1 = y1 + neq; + VectorType& ydot = rwk; + VectorType y1 ( &ydot[ neq] ); + VectorType ydot1 ( &ydot[2*neq] ); const int max_iters = 10; bool hnew_is_ok = false; @@ -463,8 +462,8 @@ int FixRxKokkos::k_rkf45_h0 } template - template -void FixRxKokkos::k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const + template +void FixRxKokkos::k_rkf45(const int neq, const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData, CounterType& counter) const { // Rounding coefficient. const double uround = DBL_EPSILON; @@ -501,9 +500,10 @@ void FixRxKokkos::k_rkf45(const int neq, const double t_stop, double //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h); // Integrate until we reach the end time. - while (fabs(t - t_stop) > tround){ - double *yout = rwork; - double *eout = yout + neq; + while (fabs(t - t_stop) > tround) + { + VectorType& yout = rwork; + VectorType eout ( &yout[neq] ); // Take a trial step. k_rkf45_step (neq, h, y, yout, eout, userData); @@ -1035,8 +1035,6 @@ template template int FixRxKokkos::k_rhs(double t, const VectorType& y, VectorType& dydt, UserDataType& userData) const { - //StridedArrayType _y( const_cast( y ) ), _dydt( dydt ); - // Use the sparse format instead. if (useSparseKinetics) return this->k_rhs_sparse( t, y, dydt, userData); @@ -1409,20 +1407,36 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF ); } + // Create scratch array space. + const size_t scratchSpaceSize = (8*nspecies + 2*nreactions); + //double *scratchSpace = new double[ scratchSpaceSize * nlocal ]; + + typename ArrayTypes::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal); + Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) { if (d_mask(i) & groupbit) { - double *y = new double[8*nspecies]; - double *rwork = y + nspecies; + //double *y = new double[8*nspecies]; + //double *rwork = y + nspecies; - UserRHSData userData; - userData.kFor = new double[nreactions]; - userData.rxnRateLaw = new double[nreactions]; + //StridedArrayType _y( y ); + //StridedArrayType _rwork( rwork ); - UserRHSDataKokkos<1> userDataKokkos; - userDataKokkos.kFor.m_data = userData.kFor; - userDataKokkos.rxnRateLaw.m_data = userData.rxnRateLaw; + StridedArrayType y( d_scratchSpace.ptr_on_device() + scratchSpaceSize * i ); + StridedArrayType rwork( &y[nspecies] ); + + //UserRHSData userData; + //userData.kFor = new double[nreactions]; + //userData.rxnRateLaw = new double[nreactions]; + + //UserRHSDataKokkos<1> userDataKokkos; + //userDataKokkos.kFor.m_data = userData.kFor; + //userDataKokkos.rxnRateLaw.m_data = userData.rxnRateLaw; + + UserRHSDataKokkos<1> userData; + userData.kFor.m_data = &( rwork[7*nspecies] ); + userData.rxnRateLaw.m_data = &( userData.kFor[ nreactions ] ); CounterType counter_i; @@ -1452,12 +1466,11 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Solver the ODE system. if (odeIntegrationFlag == ODE_LAMMPS_RK4) { - //rk4(t_stop, y, rwork, &userData); - k_rk4(t_stop, y, rwork, userDataKokkos); + k_rk4(t_stop, y, rwork, userData); } else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) { - rkf45(nspecies, t_stop, y, rwork, &userData, counter_i); + k_rkf45(nspecies, t_stop, y, rwork, userData, counter_i); if (diagnosticFrequency == 1) { @@ -1477,9 +1490,9 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF d_dvector(ispecies,i) = y[ispecies]; } - delete [] y; - delete [] userData.kFor; - delete [] userData.rxnRateLaw; + //delete [] y; + //delete [] userData.kFor; + //delete [] userData.rxnRateLaw; // Update the iteration statistics counter. Is this unique for each iteration? counter += counter_i; @@ -1490,6 +1503,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF , TotalCounters // reduction value for all iterations. ); + //delete [] scratchSpace; + TimerType timer_ODE = getTimeStamp(); // Signal that dvector has been modified on this execution space. diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index e36d606525..9ac944c6a5 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -129,23 +129,23 @@ class FixRxKokkos : public FixRX { double& h0, double y[], double rwk[], void *v_params) const; //!< Classic Runge-Kutta 4th-order stepper. - template - void k_rk4(const double t_stop, double *y, double *rwork, UserDataType& userData) const; + template + void k_rk4(const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData) const; //!< Runge-Kutta-Fehlberg ODE Solver. - template - void k_rkf45(const int neq, const double t_stop, double *y, double *rwork, UserDataType& userData, CounterType& counter) const; + template + void k_rkf45(const int neq, const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData, CounterType& counter) const; //!< Runge-Kutta-Fehlberg ODE stepper function. - template - void k_rkf45_step (const int neq, const double h, double y[], double y_out[], - double rwk[], UserDataType& userData) const; + template + void k_rkf45_step (const int neq, const double h, VectorType& y, VectorType& y_out, + VectorType& rwk, UserDataType& userData) const; //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. - template + template int k_rkf45_h0 (const int neq, const double t, const double t_stop, const double hmin, const double hmax, - double& h0, double y[], double rwk[], UserDataType& userData) const; + double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const; //!< ODE Solver diagnostics. void odeDiagnostics(void); From 4e9c8f496235016a5277a43e22f7bca5b85b4f10 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Mon, 13 Feb 2017 10:48:30 -0500 Subject: [PATCH 120/267] Update FixRXKokkos for Cuda build. Added inline and other KOKKOS macros. - Updated the function prototypes to include the necessary KOKKOS macros for __host__ and __device__ functions and inlined functions. - Changed several View definitions to match the disjoint memory spaces that only come up with Cuda builds. --- src/KOKKOS/fix_rx_kokkos.cpp | 31 +++++++++++++++++++++++++------ src/KOKKOS/fix_rx_kokkos.h | 35 +++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 09a122a108..71897157f3 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -25,13 +25,13 @@ #include "neigh_list_kokkos.h" #include "neigh_request.h" #include "error.h" -#include "math_special.h" +#include "math_special_kokkos.h" #include // DBL_EPSILON using namespace LAMMPS_NS; using namespace FixConst; -using namespace MathSpecial; +using namespace MathSpecialKokkos; #ifdef DBL_EPSILON #define MY_EPSILON (10.0*DBL_EPSILON) @@ -425,8 +425,8 @@ int FixRxKokkos::k_rkf45_h0 // should we accept this? if (hnew_is_ok || iter == max_iters){ hnew = hg; - if (iter == max_iters) - fprintf(stderr, "ERROR_HIN_MAX_ITERS\n"); + //if (iter == max_iters) + // fprintf(stderr, "ERROR_HIN_MAX_ITERS\n"); break; } @@ -1407,6 +1407,14 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF ); } + // Error flag for any failures. + DAT::tdual_int_scalar k_error_flag("pair:error_flag"); + + // Initialize and sync the device flag. + k_error_flag.h_view() = 0; + k_error_flag.template modify(); + k_error_flag.template sync(); + // Create scratch array space. const size_t scratchSpaceSize = (8*nspecies + 2*nreactions); //double *scratchSpace = new double[ scratchSpaceSize * nlocal ]; @@ -1483,7 +1491,11 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF for (int ispecies = 0; ispecies < nspecies; ispecies++) { if (y[ispecies] < -MY_EPSILON) - error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + { + //error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + k_error_flag.d_view() = 2; + // This should be an atomic update. + } else if (y[ispecies] < MY_EPSILON) y[ispecies] = 0.0; @@ -1507,6 +1519,12 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF TimerType timer_ODE = getTimeStamp(); + // Check the error flag for any failures. + k_error_flag.template modify(); + k_error_flag.template sync(); + if (k_error_flag.h_view() == 2) + error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + // Signal that dvector has been modified on this execution space. atomKK->modified( execution_space, DVECTOR_MASK ); @@ -1815,7 +1833,8 @@ void FixRxKokkos::computeLocalTemperature() { // Create an atomic view of sumWeights and dpdThetaLocal. Only needed // for Half/thread scenarios. - typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + //typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename DAT::t_efloat_1d::device_type, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; AtomicViewType a_sumWeights = d_sumWeights; diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 9ac944c6a5..c18ce6f151 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -49,6 +49,7 @@ class FixRxKokkos : public FixRX { { int nSteps, nIters, nFuncs, nFails; + KOKKOS_INLINE_FUNCTION CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {}; KOKKOS_INLINE_FUNCTION @@ -72,7 +73,7 @@ class FixRxKokkos : public FixRX { } }; - protected: + //protected: PairDPDfdtEnergyKokkos* pairDPDEKK; double VDPD; @@ -84,13 +85,15 @@ class FixRxKokkos : public FixRX { value_type *m_data; + KOKKOS_INLINE_FUNCTION StridedArrayType() : m_data(NULL) {} + KOKKOS_INLINE_FUNCTION StridedArrayType(value_type *ptr) : m_data(ptr) {} - inline value_type& operator()(const int idx) { return m_data[Stride*idx]; } - inline const value_type& operator()(const int idx) const { return m_data[Stride*idx]; } - inline value_type& operator[](const int idx) { return m_data[Stride*idx]; } - inline const value_type& operator[](const int idx) const { return m_data[Stride*idx]; } + KOKKOS_INLINE_FUNCTION value_type& operator()(const int idx) { return m_data[Stride*idx]; } + KOKKOS_INLINE_FUNCTION const value_type& operator()(const int idx) const { return m_data[Stride*idx]; } + KOKKOS_INLINE_FUNCTION value_type& operator[](const int idx) { return m_data[Stride*idx]; } + KOKKOS_INLINE_FUNCTION const value_type& operator[](const int idx) const { return m_data[Stride*idx]; } }; template @@ -100,17 +103,22 @@ class FixRxKokkos : public FixRX { StridedArrayType rxnRateLaw; }; - void solve_reactions(const int vflag, const bool isPreForce = true); + void solve_reactions(const int vflag, const bool isPreForce); int rhs (double, const double *, double *, void *) const; int rhs_dense (double, const double *, double *, void *) const; int rhs_sparse(double, const double *, double *, void *) const; template + KOKKOS_INLINE_FUNCTION int k_rhs (double, const VectorType&, VectorType&, UserDataType& ) const; + template + KOKKOS_INLINE_FUNCTION int k_rhs_dense (double, const VectorType&, VectorType&, UserDataType& ) const; + template + KOKKOS_INLINE_FUNCTION int k_rhs_sparse(double, const VectorType&, VectorType&, UserDataType& ) const; //!< Classic Runge-Kutta 4th-order stepper. @@ -130,19 +138,23 @@ class FixRxKokkos : public FixRX { //!< Classic Runge-Kutta 4th-order stepper. template + KOKKOS_INLINE_FUNCTION void k_rk4(const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData) const; //!< Runge-Kutta-Fehlberg ODE Solver. template + KOKKOS_INLINE_FUNCTION void k_rkf45(const int neq, const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData, CounterType& counter) const; //!< Runge-Kutta-Fehlberg ODE stepper function. template + KOKKOS_INLINE_FUNCTION void k_rkf45_step (const int neq, const double h, VectorType& y, VectorType& y_out, VectorType& rwk, UserDataType& userData) const; //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver. template + KOKKOS_INLINE_FUNCTION int k_rkf45_h0 (const int neq, const double t, const double t_stop, const double hmin, const double hmax, double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const; @@ -155,8 +167,10 @@ class FixRxKokkos : public FixRX { int *diagnosticCounterPerODEnFuncs; DAT::tdual_int_1d k_diagnosticCounterPerODEnSteps; DAT::tdual_int_1d k_diagnosticCounterPerODEnFuncs; - typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnSteps; - typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnFuncs; + //typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnSteps; + //typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnFuncs; + typename DAT::t_int_1d d_diagnosticCounterPerODEnSteps; + typename DAT::t_int_1d d_diagnosticCounterPerODEnFuncs; typename HAT::t_int_1d h_diagnosticCounterPerODEnSteps; typename HAT::t_int_1d h_diagnosticCounterPerODEnFuncs; @@ -185,7 +199,8 @@ class FixRxKokkos : public FixRX { // Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks. DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights; - typename ArrayTypes::t_efloat_1d d_dpdThetaLocal, d_sumWeights; + //typename ArrayTypes::t_efloat_1d d_dpdThetaLocal, d_sumWeights; + typename DAT::t_efloat_1d d_dpdThetaLocal, d_sumWeights; typename HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights; template @@ -196,7 +211,7 @@ class FixRxKokkos : public FixRX { int pack_forward_comm(int , int *, double *, int, int *); void unpack_forward_comm(int , int , double *); - private: // replicate a few from FixRX + //private: // replicate a few from FixRX int my_restartFlag; }; From 799d55e0971331c6b54527b38ec991b2f1a08212 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Mon, 13 Feb 2017 14:24:51 -0500 Subject: [PATCH 121/267] Switched to operator()'s and Tag's for the Kokkos launch objects. - Switched from using lambda functions to operator()'s with type tags in FixRxKokkos. The lambda's were giving big problems in Cuda with the memory objects. This required that all referenced views be members of the FixRXKokkos class. - Add copymode controls to solve_reactions() to avoid the destructor freeing pointers carried forward from the copy constructor. Added the same to FixRX since its called, too. --- src/KOKKOS/fix_rx_kokkos.cpp | 316 ++++++++++++++++++++++++++++++----- src/KOKKOS/fix_rx_kokkos.h | 109 +++++++++--- src/USER-DPD/fix_rx.cpp | 3 + 3 files changed, 361 insertions(+), 67 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 71897157f3..77e948be35 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -69,13 +69,16 @@ FixRxKokkos::FixRxKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; + k_error_flag = DAT::tdual_int_scalar("FixRxKokkos::k_error_flag"); + printf("Inside FixRxKokkos::FixRxKokkos\n"); } template FixRxKokkos::~FixRxKokkos() { - printf("Inside FixRxKokkos::~FixRxKokkos\n"); + printf("Inside FixRxKokkos::~FixRxKokkos copymode= %d\n", copymode); + if (copymode) return; } /* ---------------------------------------------------------------------- */ @@ -1315,6 +1318,95 @@ void FixRxKokkos::pre_force(int vflag) this->solve_reactions( vflag, true ); } + +/* ---------------------------------------------------------------------- */ + +template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(Tag_FixRxKokkos_zeroCounterViews, const int& i) const +{ + d_diagnosticCounterPerODEnSteps(i) = 0; + d_diagnosticCounterPerODEnFuncs(i) = 0; +} + +/* ---------------------------------------------------------------------- */ + +template + template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(Tag_FixRxKokkos_solveSystems, const int& i, CounterType& counter) const +{ + if (d_mask(i) & groupbit) + { + StridedArrayType y( d_scratchSpace.ptr_on_device() + scratchSpaceSize * i ); + StridedArrayType rwork( &y[nspecies] ); + + UserRHSDataKokkos<1> userData; + userData.kFor.m_data = &( rwork[7*nspecies] ); + userData.rxnRateLaw.m_data = &( userData.kFor[ nreactions ] ); + + CounterType counter_i; + + const double theta = (localTempFlag) ? d_dpdThetaLocal(i) : d_dpdTheta(i); + + //Compute the reaction rate constants + for (int irxn = 0; irxn < nreactions; irxn++) + { + if (ZERO_RATES) + userData.kFor[irxn] = 0.0; + else + { + userData.kFor[irxn] = d_kineticsData.Arr(irxn) * + pow(theta, d_kineticsData.nArr(irxn)) * + exp(-d_kineticsData.Ea(irxn) / boltz / theta); + } + } + + // Update ConcOld and initialize the ODE solution vector y[]. + for (int ispecies = 0; ispecies < nspecies; ispecies++) + { + const double tmp = d_dvector(ispecies, i); + d_dvector(ispecies+nspecies, i) = tmp; + y[ispecies] = tmp; + } + + // Solver the ODE system. + if (odeIntegrationFlag == ODE_LAMMPS_RK4) + { + k_rk4(t_stop, y, rwork, userData); + } + else if (odeIntegrationFlag == ODE_LAMMPS_RKF45) + { + k_rkf45(nspecies, t_stop, y, rwork, userData, counter_i); + + if (diagnosticFrequency == 1) + { + d_diagnosticCounterPerODEnSteps(i) = counter_i.nSteps; + d_diagnosticCounterPerODEnFuncs(i) = counter_i.nFuncs; + } + } + + // Store the solution back in dvector. + for (int ispecies = 0; ispecies < nspecies; ispecies++) + { + if (y[ispecies] < -MY_EPSILON) + { + //error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + k_error_flag.d_view() = 2; + // This should be an atomic update. + } + else if (y[ispecies] < MY_EPSILON) + y[ispecies] = 0.0; + + d_dvector(ispecies,i) = y[ispecies]; + } + + // Update the iteration statistics counter. Is this unique for each iteration? + counter += counter_i; + + } // if +} + /* ---------------------------------------------------------------------- */ template @@ -1322,12 +1414,15 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF { printf("Inside FixRxKokkos::solve_reactions localTempFlag= %d isPreForce= %s\n", localTempFlag, isPreForce ? "True" : "false"); + copymode = 1; + if (update_kinetics_data) create_kinetics_data(); TimerType timer_start = getTimeStamp(); - const int nlocal = atom->nlocal; + //const int nlocal = atom->nlocal; + this->nlocal = atom->nlocal; const int nghost = atom->nghost; const int newton_pair = force->newton_pair; @@ -1339,8 +1434,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF const int count = nlocal + (newton_pair ? nghost : 0); memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); - d_dpdThetaLocal = k_dpdThetaLocal.d_view; - h_dpdThetaLocal = k_dpdThetaLocal.h_view; + this->d_dpdThetaLocal = k_dpdThetaLocal.d_view; + this->h_dpdThetaLocal = k_dpdThetaLocal.h_view; const int neighflag = lmp->kokkos->neighflag; @@ -1376,16 +1471,21 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // ... // Local references to the atomKK objects. - typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); - typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); - typename ArrayTypes::t_int_1d d_mask = atomKK->k_mask.view(); + //typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); + //typename ArrayTypes::t_float_2d d_dvector = atomKK->k_dvector.view(); + //typename ArrayTypes::t_int_1d d_mask = atomKK->k_mask.view(); + this->d_dpdTheta = atomKK->k_dpdTheta.view(); + this->d_dvector = atomKK->k_dvector.view(); + this->d_mask = atomKK->k_mask.view(); // Get up-to-date data. atomKK->sync( execution_space, MASK_MASK | DVECTOR_MASK | DPDTHETA_MASK ); // Set some constants outside of the parallel_for - const double boltz = force->boltz; - const double t_stop = update->dt; // DPD time-step and integration length. + //const double boltz = force->boltz; + //const double t_stop = update->dt; // DPD time-step and integration length. + this->boltz = force->boltz; + this->t_stop = update->dt; // DPD time-step and integration length. // Average DPD volume. Used in the RHS function. this->VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms; @@ -1398,17 +1498,18 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.d_view; d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.d_view; - Kokkos::parallel_for ( nlocal, - LAMMPS_LAMBDA(const int i) - { - d_diagnosticCounterPerODEnSteps(i) = 0; - d_diagnosticCounterPerODEnFuncs(i) = 0; - } - ); + Kokkos::parallel_for ( Kokkos::RangePolicy(0,nlocal), *this); + //Kokkos::parallel_for ( nlocal, + // LAMMPS_LAMBDA(const int i) + // { + // d_diagnosticCounterPerODEnSteps(i) = 0; + // d_diagnosticCounterPerODEnFuncs(i) = 0; + // } + // ); } // Error flag for any failures. - DAT::tdual_int_scalar k_error_flag("pair:error_flag"); + //DAT::tdual_int_scalar k_error_flag("pair:error_flag"); // Initialize and sync the device flag. k_error_flag.h_view() = 0; @@ -1416,11 +1517,14 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF k_error_flag.template sync(); // Create scratch array space. - const size_t scratchSpaceSize = (8*nspecies + 2*nreactions); + //const size_t scratchSpaceSize = (8*nspecies + 2*nreactions); + this->scratchSpaceSize = (8*nspecies + 2*nreactions); //double *scratchSpace = new double[ scratchSpaceSize * nlocal ]; - typename ArrayTypes::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal); + //typename ArrayTypes::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal); + memory->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace"); +#if 0 Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) { if (d_mask(i) & groupbit) @@ -1514,8 +1618,15 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF , TotalCounters // reduction value for all iterations. ); +#else + if (setRatesToZero) + Kokkos::parallel_reduce( Kokkos::RangePolicy >(0,nlocal), *this, TotalCounters); + else + Kokkos::parallel_reduce( Kokkos::RangePolicy >(0,nlocal), *this, TotalCounters); +#endif //delete [] scratchSpace; + memory->destroy_kokkos (d_scratchSpace); TimerType timer_ODE = getTimeStamp(); @@ -1570,6 +1681,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF (diagnosticFrequency < 0 && update->ntimestep == update->laststep) ) this->odeDiagnostics(); } + + copymode = 0; } /* ---------------------------------------------------------------------- */ @@ -1654,7 +1767,8 @@ void FixRxKokkos::odeDiagnostics(void) double my_max[numCounters], my_min[numCounters]; - const int nlocal = atom->nlocal; + //const int nlocal = atom->nlocal; + nlocal = atom->nlocal; HAT::t_int_1d h_mask = atomKK->k_mask.h_view; for (int i = 0; i < numCounters; ++i) @@ -1760,17 +1874,122 @@ void FixRxKokkos::odeDiagnostics(void) /* ---------------------------------------------------------------------- */ +template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int& i) const +{ + d_sumWeights(i) = 0.0; + d_dpdThetaLocal(i) = 0.0; +} + +/* ---------------------------------------------------------------------- */ + +template + template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(Tag_FixRxKokkos_firstPairOperator, const int& ii) const +{ + // Create an atomic view of sumWeights and dpdThetaLocal. Only needed + // for Half/thread scenarios. + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename DAT::t_efloat_1d::device_type, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + + AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; + AtomicViewType a_sumWeights = d_sumWeights; + + // Local scalar accumulators. + double i_dpdThetaLocal = 0.0; + double i_sumWeights = 0.0; + + const int i = d_ilist(ii); + + const double xtmp = d_x(i,0); + const double ytmp = d_x(i,1); + const double ztmp = d_x(i,2); + const int itype = d_type(i); + + const int jnum = d_numneigh(i); + + for (int jj = 0; jj < jnum; jj++) + { + const int j = (d_neighbors(i,jj) & NEIGHMASK); + const int jtype = d_type(j); + + const double delx = xtmp - d_x(j,0); + const double dely = ytmp - d_x(j,1); + const double delz = ztmp - d_x(j,2); + const double rsq = delx*delx + dely*dely + delz*delz; + + const double cutsq_ij = d_cutsq(itype,jtype); + + if (rsq < cutsq_ij) + { + const double rcut = sqrt( cutsq_ij ); + double rij = sqrt(rsq); + double ratio = rij/rcut; + + double wij = 0.0; + + // Lucy's Weight Function + if (WT_FLAG == LUCY) + { + wij = (1.0+3.0*ratio) * (1.0-ratio)*(1.0-ratio)*(1.0-ratio); + i_dpdThetaLocal += wij / d_dpdTheta(j); + if (NEWTON_PAIR || j < nlocal) + a_dpdThetaLocal(j) += wij / d_dpdTheta(i); + } + + i_sumWeights += wij; + if (NEWTON_PAIR || j < nlocal) + a_sumWeights(j) += wij; + } + } + + // Update, don't assign, the array value (because another iteration may have hit it). + a_dpdThetaLocal(i) += i_dpdThetaLocal; + a_sumWeights(i) += i_sumWeights; +} + +/* ---------------------------------------------------------------------- */ + +template + template + KOKKOS_INLINE_FUNCTION +void FixRxKokkos::operator()(Tag_FixRxKokkos_2ndPairOperator, const int& i) const +{ + double wij = 0.0; + + // Lucy Weight Function + if (WT_FLAG == LUCY) + { + wij = 1.0; + d_dpdThetaLocal(i) += wij / d_dpdTheta(i); + } + d_sumWeights(i) += wij; + + // Normalized local temperature + d_dpdThetaLocal(i) = d_dpdThetaLocal(i) / d_sumWeights(i); + + if (LOCAL_TEMP_FLAG == HARMONIC) + d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i); +} + +/* ---------------------------------------------------------------------- */ + template template void FixRxKokkos::computeLocalTemperature() { - typename ArrayTypes::t_x_array_randomread d_x = atomKK->k_x.view(); - typename ArrayTypes::t_int_1d_randomread d_type = atomKK->k_type.view(); - typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); + //typename ArrayTypes::t_x_array_randomread d_x = atomKK->k_x.view(); + //typename ArrayTypes::t_int_1d_randomread d_type = atomKK->k_type.view(); + //typename ArrayTypes::t_efloat_1d d_dpdTheta = atomKK->k_dpdTheta.view(); + d_x = atomKK->k_x.view(); + d_type = atomKK->k_type.view(); + d_dpdTheta = atomKK->k_dpdTheta.view(); atomKK->sync(execution_space, X_MASK | TYPE_MASK | DPDTHETA_MASK ); - const int nlocal = atom->nlocal; + //const int nlocal = atom->nlocal; + nlocal = atom->nlocal; const int nghost = atom->nghost; printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); @@ -1780,14 +1999,15 @@ void FixRxKokkos::computeLocalTemperature() //typename ArrayTypes::t_ffloat_2d d_cutsq = pairDPDEKK->k_cutsq.template view::tdual_ffloat_2d k_cutsq; - typename ArrayTypes::t_ffloat_2d d_cutsq; - double **h_cutsq; + //typename ArrayTypes::tdual_ffloat_2d k_cutsq; + //typename ArrayTypes::t_ffloat_2d d_cutsq; + //double **h_cutsq; { const int ntypes = atom->ntypes; - memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); + //memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); + memory->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq"); d_cutsq = k_cutsq.template view(); for (int i = 1; i <= ntypes; ++i) @@ -1804,30 +2024,37 @@ void FixRxKokkos::computeLocalTemperature() // Initialize the local temperature weight array int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); - memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + //memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); d_sumWeights = k_sumWeights.d_view; h_sumWeights = k_sumWeights.h_view; // Initialize the accumulator to zero ... - Kokkos::parallel_for (sumWeightsCt, - LAMMPS_LAMBDA(const int i) - { - d_sumWeights(i) = 0.0; - } - ); + //Kokkos::parallel_for (sumWeightsCt, + // LAMMPS_LAMBDA(const int i) + // { + // d_sumWeights(i) = 0.0; + // } + // ); + + Kokkos::parallel_for (Kokkos::RangePolicy(0, sumWeightsCt), *this); // Local list views. (This isn't working!) NeighListKokkos* k_list = static_cast*>(list); if (not(list->kokkos)) error->one(FLERR,"list is not a Kokkos list\n"); - typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; - typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; - typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; + //typename ArrayTypes::t_neighbors_2d d_neighbors = k_list->d_neighbors; + //typename ArrayTypes::t_int_1d d_ilist = k_list->d_ilist; + //typename ArrayTypes::t_int_1d d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + d_numneigh = k_list->d_numneigh; const int inum = list->inum; // loop over neighbors of my atoms +#if 0 Kokkos::parallel_for ( inum, LAMMPS_LAMBDA(const int ii) { @@ -1892,6 +2119,9 @@ void FixRxKokkos::computeLocalTemperature() a_sumWeights(i) += i_sumWeights; } ); +#else + Kokkos::parallel_for (Kokkos::RangePolicy >(0, inum), *this); +#endif // Signal that dpdThetaLocal and sumWeights have been modified. k_dpdThetaLocal.template modify(); @@ -1905,6 +2135,7 @@ void FixRxKokkos::computeLocalTemperature() k_sumWeights. template sync(); // self-interaction for local temperature +#if 0 Kokkos::parallel_for ( nlocal, LAMMPS_LAMBDA(const int i) { @@ -1925,10 +2156,15 @@ void FixRxKokkos::computeLocalTemperature() d_dpdThetaLocal(i) = 1.0 / d_dpdThetaLocal(i); } ); +#else + Kokkos::parallel_for (Kokkos::RangePolicy >(0, nlocal), *this); +#endif // Clean up the local kokkos data. - memory->destroy_kokkos(k_cutsq, h_cutsq); - memory->destroy_kokkos(k_sumWeights, sumWeights); + //memory->destroy_kokkos(k_cutsq, h_cutsq); + memory->destroy_kokkos(k_cutsq); + //memory->destroy_kokkos(k_sumWeights, sumWeights); + memory->destroy_kokkos(k_sumWeights); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index c18ce6f151..169a87a2f9 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -30,6 +30,47 @@ FixStyle(rx/kk/host,FixRxKokkos) namespace LAMMPS_NS { +struct Tag_FixRxKokkos_zeroTemperatureViews {}; +struct Tag_FixRxKokkos_zeroCounterViews {}; + +template +struct Tag_FixRxKokkos_firstPairOperator {}; + +template +struct Tag_FixRxKokkos_2ndPairOperator {}; + +template +struct Tag_FixRxKokkos_solveSystems {}; + +struct s_CounterType +{ + int nSteps, nIters, nFuncs, nFails; + + KOKKOS_INLINE_FUNCTION + s_CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {}; + + KOKKOS_INLINE_FUNCTION + s_CounterType& operator+=(const s_CounterType &rhs) + { + nSteps += rhs.nSteps; + nIters += rhs.nIters; + nFuncs += rhs.nFuncs; + nFails += rhs.nFails; + return *this; + } + + KOKKOS_INLINE_FUNCTION + volatile s_CounterType& operator+=(const volatile s_CounterType &rhs) volatile + { + nSteps += rhs.nSteps; + nIters += rhs.nIters; + nFuncs += rhs.nFuncs; + nFails += rhs.nFails; + return *this; + } +}; +typedef struct s_CounterType CounterType; + template class FixRxKokkos : public FixRX { public: @@ -41,42 +82,34 @@ class FixRxKokkos : public FixRX { virtual void setup_pre_force(int); virtual void pre_force(int); - //template - // KOKKOS_INLINE_FUNCTION - //void operator()(SolverTag, const int&) const; + // Define a value_type here for the reduction operator on CounterType. + typedef CounterType value_type; - struct CounterType - { - int nSteps, nIters, nFuncs, nFails; + KOKKOS_INLINE_FUNCTION + void operator()(Tag_FixRxKokkos_zeroCounterViews, const int&) const; - KOKKOS_INLINE_FUNCTION - CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {}; + KOKKOS_INLINE_FUNCTION + void operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int&) const; - KOKKOS_INLINE_FUNCTION - CounterType& operator+=(const CounterType &rhs) - { - nSteps += rhs.nSteps; - nIters += rhs.nIters; - nFuncs += rhs.nFuncs; - nFails += rhs.nFails; - return *this; - } + template + KOKKOS_INLINE_FUNCTION + void operator()(Tag_FixRxKokkos_firstPairOperator, const int&) const; - KOKKOS_INLINE_FUNCTION - volatile CounterType& operator+=(const volatile CounterType &rhs) volatile - { - nSteps += rhs.nSteps; - nIters += rhs.nIters; - nFuncs += rhs.nFuncs; - nFails += rhs.nFails; - return *this; - } - }; + template + KOKKOS_INLINE_FUNCTION + void operator()(Tag_FixRxKokkos_2ndPairOperator, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(Tag_FixRxKokkos_solveSystems, const int&, CounterType&) const; //protected: PairDPDfdtEnergyKokkos* pairDPDEKK; double VDPD; + double boltz; + double t_stop; + template struct StridedArrayType { @@ -203,6 +236,27 @@ class FixRxKokkos : public FixRX { typename DAT::t_efloat_1d d_dpdThetaLocal, d_sumWeights; typename HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights; + typename ArrayTypes::t_x_array_randomread d_x ; + typename ArrayTypes::t_int_1d_randomread d_type ; + typename ArrayTypes::t_efloat_1d d_dpdTheta; + + typename ArrayTypes::tdual_ffloat_2d k_cutsq; + typename ArrayTypes::t_ffloat_2d d_cutsq; + //double **h_cutsq; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d d_ilist ; + typename ArrayTypes::t_int_1d d_numneigh ; + + typename ArrayTypes::t_float_2d d_dvector; + typename ArrayTypes::t_int_1d d_mask ; + + typename ArrayTypes::t_double_1d d_scratchSpace; + size_t scratchSpaceSize; + + // Error flag for any failures. + DAT::tdual_int_scalar k_error_flag; + template void computeLocalTemperature(); @@ -213,6 +267,7 @@ class FixRxKokkos : public FixRX { //private: // replicate a few from FixRX int my_restartFlag; + int nlocal; }; } diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index 28321dbecf..8a8195da19 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -220,6 +220,9 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) : FixRX::~FixRX() { + printf("Inside FixRX::~FixRX copymode= %d\n", copymode); + if (copymode) return; + // De-Allocate memory to prevent memory leak for (int ii = 0; ii < nreactions; ii++){ delete [] stoich[ii]; From acc5bde0fe53a2e9052ee7a27ceafb42acbea114 Mon Sep 17 00:00:00 2001 From: Christopher Stone Date: Mon, 13 Feb 2017 16:36:30 -0500 Subject: [PATCH 122/267] Removed printf's from FixRXKokkos and FixRX. - Commented out the printf's in FixRXKokkos and FixRX used for active debugging. --- src/KOKKOS/fix_rx_kokkos.cpp | 28 ++++++++++++++-------------- src/USER-DPD/fix_rx.cpp | 18 +++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 77e948be35..08a20ac9a7 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -71,13 +71,13 @@ FixRxKokkos::FixRxKokkos(LAMMPS *lmp, int narg, char **arg) : k_error_flag = DAT::tdual_int_scalar("FixRxKokkos::k_error_flag"); - printf("Inside FixRxKokkos::FixRxKokkos\n"); + //printf("Inside FixRxKokkos::FixRxKokkos\n"); } template FixRxKokkos::~FixRxKokkos() { - printf("Inside FixRxKokkos::~FixRxKokkos copymode= %d\n", copymode); + //printf("Inside FixRxKokkos::~FixRxKokkos copymode= %d\n", copymode); if (copymode) return; } @@ -98,7 +98,7 @@ void FixRxKokkos::post_constructor() template void FixRxKokkos::init() { - printf("Inside FixRxKokkos::init\n"); + //printf("Inside FixRxKokkos::init\n"); // Call the parent's version. //FixRX::init(); @@ -153,7 +153,7 @@ void FixRxKokkos::init() template void FixRxKokkos::init_list(int, class NeighList* ptr) { - printf("Inside FixRxKokkos::init_list\n"); + //printf("Inside FixRxKokkos::init_list\n"); this->list = ptr; } @@ -1220,7 +1220,7 @@ void FixRxKokkos::operator()(SolverType, const int &i) const template void FixRxKokkos::create_kinetics_data(void) { - printf("Inside FixRxKokkos::create_kinetics_data\n"); + //printf("Inside FixRxKokkos::create_kinetics_data\n"); memory->create_kokkos( d_kineticsData.Arr, h_kineticsData.Arr, nreactions, "KineticsType::Arr"); memory->create_kokkos( d_kineticsData.nArr, h_kineticsData.nArr, nreactions, "KineticsType::nArr"); @@ -1301,7 +1301,7 @@ void FixRxKokkos::create_kinetics_data(void) template void FixRxKokkos::setup_pre_force(int vflag) { - printf("Inside FixRxKokkos::setup_pre_force restartFlag= %d\n", my_restartFlag); + //printf("Inside FixRxKokkos::setup_pre_force restartFlag= %d\n", my_restartFlag); if (my_restartFlag) my_restartFlag = 0; @@ -1314,7 +1314,7 @@ void FixRxKokkos::setup_pre_force(int vflag) template void FixRxKokkos::pre_force(int vflag) { - printf("Inside FixRxKokkos::pre_force localTempFlag= %d\n", localTempFlag); + //printf("Inside FixRxKokkos::pre_force localTempFlag= %d\n", localTempFlag); this->solve_reactions( vflag, true ); } @@ -1412,7 +1412,7 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_solveSystems void FixRxKokkos::solve_reactions(const int vflag, const bool isPreForce) { - printf("Inside FixRxKokkos::solve_reactions localTempFlag= %d isPreForce= %s\n", localTempFlag, isPreForce ? "True" : "false"); + //printf("Inside FixRxKokkos::solve_reactions localTempFlag= %d isPreForce= %s\n", localTempFlag, isPreForce ? "True" : "false"); copymode = 1; @@ -1653,11 +1653,11 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE); - printf("me= %d kokkos total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, - getElapsedTime(timer_start, timer_stop), - getElapsedTime(timer_start, timer_localTemperature), - getElapsedTime(timer_localTemperature, timer_ODE), - getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps); + //printf("me= %d kokkos total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, + // getElapsedTime(timer_start, timer_stop), + // getElapsedTime(timer_start, timer_localTemperature), + // getElapsedTime(timer_localTemperature, timer_ODE), + // getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps); // Warn the user if a failure was detected in the ODE solver. if (TotalCounters.nFails > 0){ @@ -1992,7 +1992,7 @@ void FixRxKokkos::computeLocalTemperature() nlocal = atom->nlocal; const int nghost = atom->nghost; - printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); + //printf("Inside FixRxKokkos::computeLocalTemperature: %d %d %d %d %d %d %d\n", WT_FLAG, LOCAL_TEMP_FLAG, NEWTON_PAIR, (int)lmp->kokkos->neighflag, NEIGHFLAG, nlocal, nghost); // Pull from pairDPDE. The pairDPDEKK objects are protected so recreate here for now. //pairDPDEKK->k_cutsq.template sync(); diff --git a/src/USER-DPD/fix_rx.cpp b/src/USER-DPD/fix_rx.cpp index 8a8195da19..a8939e27f2 100644 --- a/src/USER-DPD/fix_rx.cpp +++ b/src/USER-DPD/fix_rx.cpp @@ -220,7 +220,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) : FixRX::~FixRX() { - printf("Inside FixRX::~FixRX copymode= %d\n", copymode); + //printf("Inside FixRX::~FixRX copymode= %d\n", copymode); if (copymode) return; // De-Allocate memory to prevent memory leak @@ -756,8 +756,8 @@ void FixRX::pre_force(int vflag) memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE"); } - #pragma omp parallel \ - reduction(+: nSteps, nIters, nFuncs, nFails ) + //#pragma omp parallel \ + // reduction(+: nSteps, nIters, nFuncs, nFails ) { double *rwork = new double[8*nspecies]; @@ -767,7 +767,7 @@ void FixRX::pre_force(int vflag) int ode_counter[4] = { 0 }; - #pragma omp for schedule(runtime) + //#pragma omp for schedule(runtime) for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) @@ -810,11 +810,11 @@ void FixRX::pre_force(int vflag) double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE); - printf("me= %d total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, - getElapsedTime(timer_start, timer_stop), - getElapsedTime(timer_start, timer_localTemperature), - getElapsedTime(timer_localTemperature, timer_ODE), - getElapsedTime(timer_ODE, timer_stop), nlocal, nFuncs, nSteps); + //printf("me= %d total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me, + // getElapsedTime(timer_start, timer_stop), + // getElapsedTime(timer_start, timer_localTemperature), + // getElapsedTime(timer_localTemperature, timer_ODE), + // getElapsedTime(timer_ODE, timer_stop), nlocal, nFuncs, nSteps); // Warn the user if a failure was detected in the ODE solver. if (nFails > 0){ From 0a751c59012ec2ef97d5d5313512983ef77f2c0f Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 22 Feb 2017 11:52:20 -0500 Subject: [PATCH 123/267] KOKKOS: fix a compile-time error caused by merge of patch 21Feb17 Remove the unused PairHybridOverlayKokkos::modify_requests() method The patch removed the parent PairHybridOverlay::modify_requests() --- src/KOKKOS/pair_hybrid_overlay_kokkos.cpp | 35 ----------------------- src/KOKKOS/pair_hybrid_overlay_kokkos.h | 3 -- 2 files changed, 38 deletions(-) diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp index 79d9c63221..aa5d895155 100644 --- a/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.cpp @@ -105,38 +105,3 @@ void PairHybridOverlayKokkos::coeff(int narg, char **arg) if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } - -/* ---------------------------------------------------------------------- - combine sub-style neigh list requests and create new ones if needed -------------------------------------------------------------------------- */ - -void PairHybridOverlayKokkos::modify_requests() -{ - int i,j; - NeighRequest *irq,*jrq; - - // loop over pair requests only - // if a previous list is same kind with same skip attributes - // then make this one a copy list of that one - // works whether both lists are no-skip or yes-skip - // will not point a list at a copy list, but at copy list's parent - - for (i = 0; i < neighbor->nrequest; i++) { - if (!neighbor->requests[i]->pair) continue; - - irq = neighbor->requests[i]; - for (j = 0; j < i; j++) { - if (!neighbor->requests[j]->pair) continue; - jrq = neighbor->requests[j]; - if (irq->same_kind(jrq) && irq->same_skip(jrq)) { - irq->copy = 1; - irq->otherlist = j; - break; - } - } - } - - // perform same operations on skip lists as pair style = hybrid - - PairHybrid::modify_requests(); -} diff --git a/src/KOKKOS/pair_hybrid_overlay_kokkos.h b/src/KOKKOS/pair_hybrid_overlay_kokkos.h index 2e4899a1f3..6bec57c453 100644 --- a/src/KOKKOS/pair_hybrid_overlay_kokkos.h +++ b/src/KOKKOS/pair_hybrid_overlay_kokkos.h @@ -29,9 +29,6 @@ class PairHybridOverlayKokkos : public PairHybridKokkos { PairHybridOverlayKokkos(class LAMMPS *); virtual ~PairHybridOverlayKokkos() {} void coeff(int, char **); - - private: - void modify_requests(); }; } From 2db66e49b444c829a27e7a874d0fba49faf0387b Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 30 Dec 2016 12:16:54 -0500 Subject: [PATCH 124/267] USER-DPD: make pair_dpd_fdt* check more generically for use of fix_shardlow Allows easier experimentation of alternative shardlow implementations. --- src/USER-DPD/pair_dpd_fdt.cpp | 2 +- src/USER-DPD/pair_dpd_fdt_energy.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/USER-DPD/pair_dpd_fdt.cpp b/src/USER-DPD/pair_dpd_fdt.cpp index e7e9febd82..90aa4f1eaf 100644 --- a/src/USER-DPD/pair_dpd_fdt.cpp +++ b/src/USER-DPD/pair_dpd_fdt.cpp @@ -325,7 +325,7 @@ void PairDPDfdt::init_style() splitFDT_flag = false; int irequest = neighbor->request(this,instance_me); for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"shardlow") == 0){ + if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){ splitFDT_flag = true; } } diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index 9d08393b9d..ad6310a283 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -414,7 +414,7 @@ void PairDPDfdtEnergy::init_style() splitFDT_flag = false; int irequest = neighbor->request(this,instance_me); for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"shardlow") == 0){ + if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){ splitFDT_flag = true; } From 0512e7886067fbed5d9178654ffe74b16020e258 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 30 Dec 2016 14:42:21 -0500 Subject: [PATCH 125/267] USER-DPD: new neighbor list code for SSA that gives neighbors to ghosts. This simplifies the processing of the neighbor list in fix_shardlow. NOTE: pair evaluation order changes, causing numerical differences! --- src/USER-DPD/fix_shardlow.cpp | 18 ++- src/USER-DPD/nbin_ssa.cpp | 27 ++-- src/USER-DPD/nbin_ssa.h | 2 +- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 153 ++++++++---------- src/USER-DPD/npair_half_bin_newton_ssa.h | 2 +- src/USER-DPD/npair_halffull_newton_ssa.cpp | 4 + .../nstencil_half_bin_2d_newton_ssa.cpp | 12 +- .../nstencil_half_bin_2d_newton_ssa.h | 2 +- .../nstencil_half_bin_3d_newton_ssa.cpp | 18 ++- .../nstencil_half_bin_3d_newton_ssa.h | 2 +- src/USER-DPD/nstencil_ssa.h | 2 +- 11 files changed, 129 insertions(+), 113 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index bf8959fa9f..56597697f7 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -148,6 +148,7 @@ void FixShardlow::init() int irequest = neighbor->request(this,instance_me); neighbor->requests[irequest]->pair = 0; neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->ghost= 1; neighbor->requests[irequest]->ssa = 1; } @@ -498,7 +499,7 @@ void FixShardlow::ssa_update_dpde( void FixShardlow::initial_integrate(int vflag) { - int i,ii,inum; + int i,ii,inum,anum; int *ilist; int nlocal = atom->nlocal; @@ -531,10 +532,12 @@ void FixShardlow::initial_integrate(int vflag) v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0"); inum = list->inum; + anum = inum + list->gnum; ilist = list->ilist; dtsqrt = sqrt(update->dt); + ii = 0; //Loop over all 14 directions (8 stages) for (airnum = 1; airnum <=8; airnum++){ @@ -549,15 +552,16 @@ void FixShardlow::initial_integrate(int vflag) } } - // Loop over neighbors of my atoms - for (ii = 0; ii < inum; ii++) { + // process neighbors in this AIR + while (ii < anum) { i = ilist[ii]; - int start = (airnum < 2) ? 0 : list->ndxAIR_ssa[i][airnum - 2]; - int len = list->ndxAIR_ssa[i][airnum - 1] - start; + if (atom->ssaAIR[i] > airnum) break; /* done with curent AIR */ + int len = list->numneigh[i]; if (len > 0) { - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][start]), len); + if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); + else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); } + ii++; } // Communicate the ghost deltas to the atom owners diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 73da5e0df3..c2d780bac6 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -33,14 +33,13 @@ NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) bins_ssa = NULL; maxhead_ssa = 0; binhead_ssa = NULL; - gbinhead_ssa = NULL; + for (int i = 0; i < 9; i++) gairhead_ssa[i] = -1; } NBinSSA::~NBinSSA() { memory->destroy(bins_ssa); memory->destroy(binhead_ssa); - memory->destroy(gbinhead_ssa); } /* ---------------------------------------------------------------------- @@ -62,8 +61,11 @@ void NBinSSA::bin_atoms() last_bin = update->ntimestep; + for (i = 0; i < 9; i++) { + gairhead_ssa[i] = -1; + } + for (i = 0; i < mbins; i++) { - gbinhead_ssa[i] = -1; binhead_ssa[i] = -1; } @@ -73,19 +75,19 @@ void NBinSSA::bin_atoms() int bitmask = group->bitmask[includegroup]; int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above for (i = nall-1; i >= nowned; i--) { - if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR + ibin = ssaAIR[i]; + if (ibin < 2) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { - ibin = coord2bin(x[i]); - bins_ssa[i] = gbinhead_ssa[ibin]; - gbinhead_ssa[ibin] = i; + bins_ssa[i] = gairhead_ssa[ibin]; + gairhead_ssa[ibin] = i; } } } else { for (i = nall-1; i >= nlocal; i--) { - if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR - ibin = coord2bin(x[i]); - bins_ssa[i] = gbinhead_ssa[ibin]; - gbinhead_ssa[ibin] = i; + ibin = ssaAIR[i]; + if (ibin < 2) continue; // skip ghost atoms not in AIR + bins_ssa[i] = gairhead_ssa[ibin]; + gairhead_ssa[ibin] = i; } } for (i = nlocal-1; i >= 0; i--) { @@ -103,10 +105,8 @@ void NBinSSA::bin_atoms_setup(int nall) if (mbins > maxhead_ssa) { maxhead_ssa = mbins; - memory->destroy(gbinhead_ssa); memory->destroy(binhead_ssa); memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa"); - memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa"); } if (nall > maxbin_ssa) { @@ -125,7 +125,6 @@ bigint NBinSSA::memory_usage() if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa); if (maxhead_ssa) { bytes += memory->usage(binhead_ssa,maxhead_ssa); - bytes += memory->usage(gbinhead_ssa,maxhead_ssa); } return bytes; } diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index f0699b3a7a..5a2562d305 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -32,7 +32,7 @@ class NBinSSA : public NBinStandard { int *bins_ssa; // index of next atom in each bin int maxbin_ssa; // size of bins_ssa array int *binhead_ssa; // index of 1st local atom in each bin - int *gbinhead_ssa; // index of 1st ghost atom in each bin + int gairhead_ssa[9]; // index of 1st ghost atom in each AIR int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays NBinSSA(class LAMMPS *); diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index fd67b66e9b..4c9dc95308 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -32,12 +32,6 @@ using namespace LAMMPS_NS; -// allocate space for static class variable -// prototype for non-class function - -static int *ssaAIRptr; -static int cmp_ssaAIR(const void *, const void *); - /* ---------------------------------------------------------------------- */ NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {} @@ -64,9 +58,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) tagint **special = atom->special; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; - int nall = nlocal + atom->nghost; if (includegroup) nlocal = atom->nfirst; - int *ssaAIR = atom->ssaAIR; int *molindex = atom->molindex; int *molatom = atom->molatom; @@ -89,16 +81,18 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object"); int *bins_ssa = nb_ssa->bins_ssa; int *binhead_ssa = nb_ssa->binhead_ssa; - int *gbinhead_ssa = nb_ssa->gbinhead_ssa; + int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]); int inum = 0; + int gnum = 0; + int xbin,ybin,zbin,xbin2,ybin2,zbin2; + int **stencilxyz = ns_ssa->stencilxyz; ipage->reset(); // loop over owned atoms, storing half of the neighbors for (i = 0; i < nlocal; i++) { - int AIRct[8] = { 0 }; n = 0; neighptr = ipage->vget(); @@ -175,51 +169,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } } } - AIRct[0] = n; - - // loop over AIR ghost atoms in all bins in "full" stencil - // Note: the non-AIR ghost atoms have already been filtered out - // That is a significant time savings because of the "full" stencil - // Note2: only non-pure locals can have ghosts as neighbors - - if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) { - for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0; - j = bins_ssa[j]) { - - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) { - neighptr[n++] = j; - ++(AIRct[ssaAIR[j] - 1]); - } else if (domain->minimum_image_check(delx,dely,delz)) { - neighptr[n++] = j; - ++(AIRct[ssaAIR[j] - 1]); - } else if (which > 0) { - neighptr[n++] = j ^ (which << SBBITS); - ++(AIRct[ssaAIR[j] - 1]); - } - } else { - neighptr[n++] = j; - ++(AIRct[ssaAIR[j] - 1]); - } - } - } - } ilist[inum++] = i; firstneigh[i] = neighptr; @@ -227,34 +176,74 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - // sort the ghosts in the neighbor list by their ssaAIR number - - ssaAIRptr = atom->ssaAIR; - qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR); - - // do a prefix sum on the counts to turn them into indexes - - list->ndxAIR_ssa[i][0] = AIRct[0]; - for (int ndx = 1; ndx < 8; ++ndx) { - list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1]; - } } list->inum = inum; + + // loop over AIR ghost atoms, storing their local neighbors + // since these are ghosts, must check if stencil bin is out of bounds + for (int airnum = 2; airnum <= 8; airnum++) { + for (i = gairhead_ssa[airnum]; i >= 0; i = bins_ssa[i]) { + n = 0; + neighptr = ipage->vget(); + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + if (moltemplate) { + imol = molindex[i]; + iatom = molatom[i]; + tagprev = tag[i] - iatom - 1; + } + + ibin = coord2bin(x[i],xbin,ybin,zbin); + + // loop over AIR ghost atoms in all bins in "full" stencil + // Note: the non-AIR ghost atoms have already been filtered out + for (k = 0; k < nstencil_full; k++) { + xbin2 = xbin + stencilxyz[k][0]; + ybin2 = ybin + stencilxyz[k][1]; + zbin2 = zbin + stencilxyz[k][2]; + // since we only care about ghost to local neighbors, these "bounds" could be inset + if (xbin2 < 0 || xbin2 >= mbinx || + ybin2 < 0 || ybin2 >= mbiny || + zbin2 < 0 || zbin2 >= mbinz) continue; + for (j = binhead_ssa[ibin+stencil[k]]; j >= 0; j = bins_ssa[j]) { + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + if (n > 0) ilist[inum + (gnum++)] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage->vgot(n); + if (ipage->status()) + error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one"); + } + } + list->gnum = gnum; } - -/* ---------------------------------------------------------------------- - comparison function invoked by qsort() - accesses static class member ssaAIRptr, set before call to qsort() -------------------------------------------------------------------------- */ - -static int cmp_ssaAIR(const void *iptr, const void *jptr) -{ - int i = NEIGHMASK & *((int *) iptr); - int j = NEIGHMASK & *((int *) jptr); - if (ssaAIRptr[i] < ssaAIRptr[j]) return -1; - if (ssaAIRptr[i] > ssaAIRptr[j]) return 1; - return 0; -} - diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.h b/src/USER-DPD/npair_half_bin_newton_ssa.h index 13347b33b0..c9ccbc4bd9 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.h +++ b/src/USER-DPD/npair_half_bin_newton_ssa.h @@ -15,7 +15,7 @@ NPairStyle(half/bin/newton/ssa, NPairHalfBinNewtonSSA, - NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA) + NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST) #else diff --git a/src/USER-DPD/npair_halffull_newton_ssa.cpp b/src/USER-DPD/npair_halffull_newton_ssa.cpp index 2c9de3e50f..d0be1685b6 100644 --- a/src/USER-DPD/npair_halffull_newton_ssa.cpp +++ b/src/USER-DPD/npair_halffull_newton_ssa.cpp @@ -64,6 +64,10 @@ void NPairHalffullNewtonSSA::build(NeighList *list) int inum_full = list->listfull->inum; int inum = 0; + + error->one(FLERR,"NPairHalffullNewtonSSA not yet implemented for ghosts with neighbors."); + return; + ipage->reset(); // loop over parent full list diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp index df379a109a..254339bffc 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp @@ -46,8 +46,12 @@ void NStencilHalfBin2dNewtonSSA::create() for (j = 0; j <= sy; j++) for (i = -sx; i <= sx; i++) if (j > 0 || (j == 0 && i > 0)) - if (bin_distance(i,j,0) < cutneighmaxsq) + if (bin_distance(i,j,0) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; + } nstencil_half = pos; // record where normal half stencil ends @@ -56,8 +60,12 @@ void NStencilHalfBin2dNewtonSSA::create() for (j = -sy; j <= 0; j++) for (i = -sx; i <= sx; i++) { if (j == 0 && i > 0) continue; - if (bin_distance(i,j,0) < cutneighmaxsq) + if (bin_distance(i,j,0) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; + } } nstencil = pos; // record where full stencil ends diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h index 30901bb3e2..1d5cc3f6b2 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h @@ -15,7 +15,7 @@ NStencilStyle(half/bin/2d/newton/ssa, NStencilHalfBin2dNewtonSSA, - NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO) + NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST) #else diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp index 76c9931ab2..1e2c18c66a 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp @@ -47,8 +47,12 @@ void NStencilHalfBin3dNewtonSSA::create() for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) if (k > 0 || j > 0 || (j == 0 && i > 0)) - if (bin_distance(i,j,k) < cutneighmaxsq) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } nstencil_half = pos; // record where normal half stencil ends @@ -57,8 +61,12 @@ void NStencilHalfBin3dNewtonSSA::create() for (k = -sz; k < 0; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) - if (bin_distance(i,j,k) < cutneighmaxsq) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } // For k==0, make sure to skip already included bins @@ -66,8 +74,12 @@ void NStencilHalfBin3dNewtonSSA::create() for (j = -sy; j <= 0; j++) for (i = -sx; i <= sx; i++) { if (j == 0 && i > 0) continue; - if (bin_distance(i,j,k) < cutneighmaxsq) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } } nstencil = pos; // record where full stencil ends diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h index 7765b256d3..450a696e46 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h @@ -15,7 +15,7 @@ NStencilStyle(half/bin/3d/newton/ssa, NStencilHalfBin3dNewtonSSA, - NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO) + NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST) #else diff --git a/src/USER-DPD/nstencil_ssa.h b/src/USER-DPD/nstencil_ssa.h index 9fcd19ee26..e6dfce60f4 100644 --- a/src/USER-DPD/nstencil_ssa.h +++ b/src/USER-DPD/nstencil_ssa.h @@ -20,7 +20,7 @@ namespace LAMMPS_NS { class NStencilSSA : public NStencil { public: - NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { } + NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { xyzflag = 1; } ~NStencilSSA() {} virtual void create() = 0; From 638448676404468bda5253124d27ccf3c13a043e Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 26 Jan 2017 13:12:28 -0500 Subject: [PATCH 126/267] USER-DPD: Copy inline coord2bin() functions from nbin_kokkos into nbin_ssa --- src/USER-DPD/nbin_ssa.cpp | 5 ++- src/USER-DPD/nbin_ssa.h | 72 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index c2d780bac6..82cf6e7cac 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -61,6 +61,9 @@ void NBinSSA::bin_atoms() last_bin = update->ntimestep; + bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; + bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; + for (i = 0; i < 9; i++) { gairhead_ssa[i] = -1; } @@ -91,7 +94,7 @@ void NBinSSA::bin_atoms() } } for (i = nlocal-1; i >= 0; i--) { - ibin = coord2bin(x[i]); + ibin = coord2bin(x[i][0], x[i][1], x[i][2]); bins_ssa[i] = binhead_ssa[ibin]; binhead_ssa[ibin] = i; } diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 5a2562d305..c39d7c7bce 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -42,6 +42,78 @@ class NBinSSA : public NBinStandard { void bin_atoms(); bigint memory_usage(); + + inline + int coord2bin(const double & x,const double & y,const double & z) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + inline + int coord2bin(const double & x,const double & y,const double & z, int* i) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + i[0] = ix - mbinxlo; + i[1] = iy - mbinylo; + i[2] = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + private: + double bboxlo_[3],bboxhi_[3]; + }; } From ff2786c86c4c3c1e103fefeccd0bdaeee826a4d4 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 26 Jan 2017 14:28:54 -0500 Subject: [PATCH 127/267] USER-DPD: Make another version of coord2bin() for nbin_ssa --- src/USER-DPD/nbin_ssa.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index c39d7c7bce..75766ebcd2 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -111,6 +111,42 @@ class NBinSSA : public NBinStandard { return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } + inline + int coord2bin(const double & x,const double & y,const double & z, int &ixo, int &iyo, int &izo) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + ixo = ix - mbinxlo; + iyo = iy - mbinylo; + izo = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + private: double bboxlo_[3],bboxhi_[3]; From e42678ed517d8bc95f16da08329a2ae63bffe7bb Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 26 Jan 2017 16:20:12 -0500 Subject: [PATCH 128/267] USER-DPD: track & use the extent of the local atoms in the bins --- src/USER-DPD/nbin_ssa.cpp | 73 ++++++++++++++-------- src/USER-DPD/nbin_ssa.h | 16 +++-- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 49 ++++++++++----- src/neigh_list.h | 1 + 4 files changed, 93 insertions(+), 46 deletions(-) diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 82cf6e7cac..321baf771a 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -30,22 +30,24 @@ using namespace LAMMPS_NS; NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) { maxbin_ssa = 0; - bins_ssa = NULL; - maxhead_ssa = 0; - binhead_ssa = NULL; - for (int i = 0; i < 9; i++) gairhead_ssa[i] = -1; + binlist_ssa = NULL; + binct_ssa = NULL; + for (int i = 0; i < 9; i++) { + gairhead_ssa[i] = -1; + gairct_ssa[i] = 0; + } } NBinSSA::~NBinSSA() { - memory->destroy(bins_ssa); - memory->destroy(binhead_ssa); + memory->destroy(binlist_ssa); + memory->destroy(binct_ssa); } /* ---------------------------------------------------------------------- bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA) - local atoms are in distinct bins (binhead_ssa) from the ghosts - ghost atoms are in distinct bins (gbinhead_ssa) from the locals + local atoms are in distinct bins (binhead[]) from the ghosts + ghost atoms are "binned" in gairhead_ssa[] instead ghosts which are not in an Active Interaction Region (AIR) are skipped ------------------------------------------------------------------------- */ @@ -58,6 +60,7 @@ void NBinSSA::bin_atoms() double **x = atom->x; int *mask = atom->mask; int *ssaAIR = atom->ssaAIR; + int xbin,ybin,zbin; last_bin = update->ntimestep; @@ -66,10 +69,13 @@ void NBinSSA::bin_atoms() for (i = 0; i < 9; i++) { gairhead_ssa[i] = -1; + gairct_ssa[i] = 0; } for (i = 0; i < mbins; i++) { - binhead_ssa[i] = -1; + binhead[i] = -1; + binlist_ssa[i] = -1; + binct_ssa[i] = 0; } // bin in reverse order so linked list will be in forward order @@ -81,23 +87,34 @@ void NBinSSA::bin_atoms() ibin = ssaAIR[i]; if (ibin < 2) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { - bins_ssa[i] = gairhead_ssa[ibin]; + bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; + ++(gairct_ssa[ibin]); } } } else { for (i = nall-1; i >= nlocal; i--) { ibin = ssaAIR[i]; if (ibin < 2) continue; // skip ghost atoms not in AIR - bins_ssa[i] = gairhead_ssa[ibin]; + bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; + ++(gairct_ssa[ibin]); } } for (i = nlocal-1; i >= 0; i--) { - ibin = coord2bin(x[i][0], x[i][1], x[i][2]); - bins_ssa[i] = binhead_ssa[ibin]; - binhead_ssa[ibin] = i; + ibin = coord2bin(x[i][0], x[i][1], x[i][2], xbin, ybin, zbin); + // Find the bounding box of the local atoms in the bins + if (xbin < lbinxlo) lbinxlo = xbin; + if (xbin >= lbinxhi) lbinxhi = xbin + 1; + if (ybin < lbinylo) lbinylo = ybin; + if (ybin >= lbinyhi) lbinyhi = ybin + 1; + if (zbin < lbinzlo) lbinzlo = zbin; + if (zbin >= lbinzhi) lbinzhi = zbin + 1; + bins[i] = binhead[ibin]; + binhead[ibin] = i; + ++(binct_ssa[ibin]); } + } /* ---------------------------------------------------------------------- */ @@ -106,17 +123,21 @@ void NBinSSA::bin_atoms_setup(int nall) { NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too - if (mbins > maxhead_ssa) { - maxhead_ssa = mbins; - memory->destroy(binhead_ssa); - memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa"); + if (mbins > maxbin_ssa) { + maxbin_ssa = mbins; + memory->destroy(binlist_ssa); + memory->destroy(binct_ssa); + memory->create(binlist_ssa,maxbin_ssa,"binlist_ssa"); + memory->create(binct_ssa,maxbin_ssa,"binct_ssa"); } - if (nall > maxbin_ssa) { - maxbin_ssa = nall; - memory->destroy(bins_ssa); - memory->create(bins_ssa,maxbin_ssa,"bins_ssa"); - } + // Clear the local bin extent bounding box. + lbinxlo = mbinx - 1; // Safe to = stencil->sx + 1 + lbinylo = mbiny - 1; // Safe to = stencil->sy + 1 + lbinzlo = mbinz - 1; // Safe to = stencil->sz + 1 + lbinxhi = 0; // Safe to = mbinx - stencil->sx - 1 + lbinyhi = 0; // Safe to = mbiny - stencil->sy - 1 + lbinzhi = 0; // Safe to = mbinz - stencil->sz - 1 } /* ---------------------------------------------------------------------- */ @@ -125,9 +146,9 @@ bigint NBinSSA::memory_usage() { bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too - if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa); - if (maxhead_ssa) { - bytes += memory->usage(binhead_ssa,maxhead_ssa); + if (maxbin_ssa) { + bytes += memory->usage(binlist_ssa,maxbin_ssa); + bytes += memory->usage(binct_ssa,maxbin_ssa); } return bytes; } diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 75766ebcd2..48694370b9 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -29,11 +29,19 @@ namespace LAMMPS_NS { class NBinSSA : public NBinStandard { public: - int *bins_ssa; // index of next atom in each bin - int maxbin_ssa; // size of bins_ssa array - int *binhead_ssa; // index of 1st local atom in each bin + int *binlist_ssa; // index in neighlist of 1st local atom in each bin + int *binct_ssa; // count of local atoms in each bin int gairhead_ssa[9]; // index of 1st ghost atom in each AIR - int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays + int gairct_ssa[9]; // count of ghost atoms in each AIR + int maxbin_ssa; // size of binlist_ssa and binct_ssa arrays + + // Bounds of the local atoms in the binhead array + int lbinxlo; // lowest local bin x-dim coordinate + int lbinylo; // lowest local bin y-dim coordinate + int lbinzlo; // lowest local bin z-dim coordinate + int lbinxhi; // highest local bin x-dim coordinate + int lbinyhi; // highest local bin y-dim coordinate + int lbinzhi; // highest local bin z-dim coordinate NBinSSA(class LAMMPS *); ~NBinSSA(); diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 4c9dc95308..f0860cba4b 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -79,20 +79,32 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) NBinSSA *nb_ssa = dynamic_cast(nb); if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object"); - int *bins_ssa = nb_ssa->bins_ssa; - int *binhead_ssa = nb_ssa->binhead_ssa; + int *bins = nb_ssa->bins; + int *binhead = nb_ssa->binhead; + int *binlist_ssa = nb_ssa->binlist_ssa; + int *binct_ssa = nb_ssa->binct_ssa; int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]); int inum = 0; int gnum = 0; int xbin,ybin,zbin,xbin2,ybin2,zbin2; int **stencilxyz = ns_ssa->stencilxyz; + int lbinxlo = nb_ssa->lbinxlo; + int lbinxhi = nb_ssa->lbinxhi; + int lbinylo = nb_ssa->lbinylo; + int lbinyhi = nb_ssa->lbinyhi; + int lbinzlo = nb_ssa->lbinzlo; + int lbinzhi = nb_ssa->lbinzhi; ipage->reset(); - // loop over owned atoms, storing half of the neighbors - - for (i = 0; i < nlocal; i++) { + // loop over bins with local atoms, storing half of the neighbors + for (zbin = lbinzlo; zbin < lbinzhi; zbin++) { + for (ybin = lbinylo; ybin < lbinyhi; ybin++) { + for (xbin = lbinxlo; xbin < lbinxhi; xbin++) { + ibin = zbin*mbiny*mbinx + ybin*mbinx + xbin; + binlist_ssa[ibin] = inum; // record where ibin starts in ilist + for (i = binhead[ibin]; i >= 0; i = bins[i]) { n = 0; neighptr = ipage->vget(); @@ -109,7 +121,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) // loop over rest of local atoms in i's bin // just store them, since j is beyond i in linked list - for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) { + for (j = bins[i]; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; @@ -136,13 +148,11 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } } - ibin = coord2bin(x[i]); - // loop over all local atoms in other bins in "half" stencil for (k = 0; k < nstencil_half; k++) { - for (j = binhead_ssa[ibin+stencil[k]]; j >= 0; - j = bins_ssa[j]) { + for (j = binhead[ibin+stencil[k]]; j >= 0; + j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; @@ -177,13 +187,20 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); } + // verify count of atoms in ibin + if (binct_ssa[ibin] != (inum - binlist_ssa[ibin])) + error->one(FLERR,"binct_ssa didn't agree with lenght in ilist"); + } + } + } list->inum = inum; // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds for (int airnum = 2; airnum <= 8; airnum++) { - for (i = gairhead_ssa[airnum]; i >= 0; i = bins_ssa[i]) { + list->AIRct_ssa[airnum - 1] = nb_ssa->gairct_ssa[airnum]; + for (i = gairhead_ssa[airnum]; i >= 0; i = bins[i]) { n = 0; neighptr = ipage->vget(); @@ -205,11 +222,11 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) xbin2 = xbin + stencilxyz[k][0]; ybin2 = ybin + stencilxyz[k][1]; zbin2 = zbin + stencilxyz[k][2]; - // since we only care about ghost to local neighbors, these "bounds" could be inset - if (xbin2 < 0 || xbin2 >= mbinx || - ybin2 < 0 || ybin2 >= mbiny || - zbin2 < 0 || zbin2 >= mbinz) continue; - for (j = binhead_ssa[ibin+stencil[k]]; j >= 0; j = bins_ssa[j]) { + // Skip it if this bin is outside the extent of local bins + if (xbin2 < lbinxlo || xbin2 >= lbinxhi || + ybin2 < lbinylo || ybin2 >= lbinyhi || + zbin2 < lbinzlo || zbin2 >= lbinzhi) continue; + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/neigh_list.h b/src/neigh_list.h index 9a77a0311d..7649245e99 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -80,6 +80,7 @@ class NeighList : protected Pointers { // USER-DPD package and Shardlow Splitting Algorithm (SSA) support + int AIRct_ssa[8]; // count of how many atoms in each AIR uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR // methods From e9d46f4e7acb79d57c982f59cbcd335e96beb10e Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 27 Jan 2017 12:31:13 -0500 Subject: [PATCH 129/267] USER-DPD: Correct an error message typo. --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index f0860cba4b..b9306ee3b1 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -189,7 +189,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } // verify count of atoms in ibin if (binct_ssa[ibin] != (inum - binlist_ssa[ibin])) - error->one(FLERR,"binct_ssa didn't agree with lenght in ilist"); + error->one(FLERR,"binct_ssa didn't agree with length in ilist"); } } } From fb279a87f5eac44e02319e4464a05fa62aa87794 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 27 Jan 2017 13:24:46 -0500 Subject: [PATCH 130/267] USER-DPD: properly compute AIRct_ssa values, and use them in fix_shardlow. Eliminates last use of per-atom ssaAIR values within initial_integrate() --- src/USER-DPD/fix_shardlow.cpp | 36 ++++++++++++++-------- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 8 +++-- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 56597697f7..9253d17317 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -538,24 +538,34 @@ void FixShardlow::initial_integrate(int vflag) dtsqrt = sqrt(update->dt); ii = 0; - //Loop over all 14 directions (8 stages) - for (airnum = 1; airnum <=8; airnum++){ + // process neighbors in the local AIR + while (ii < inum) { + i = ilist[ii]; + int len = list->numneigh[i]; + if (len > 0) { + if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); + else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); + } + ii++; + } - if (airnum > 1) { - // Communicate the updated velocities to all nodes - comm->forward_comm_fix(this); + ii = inum; + //Loop over all 13 outward directions (7 stages) + for (airnum = 1; airnum <=7; airnum++){ + int ct = list->AIRct_ssa[airnum]; - if(useDPDE){ - // Zero out the ghosts' uCond & uMech to be used as delta accumulators - memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost); - memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); - } + // Communicate the updated velocities to all nodes + comm->forward_comm_fix(this); + + if(useDPDE){ + // Zero out the ghosts' uCond & uMech to be used as delta accumulators + memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost); + memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); } // process neighbors in this AIR - while (ii < anum) { + while (ct-- > 0) { i = ilist[ii]; - if (atom->ssaAIR[i] > airnum) break; /* done with curent AIR */ int len = list->numneigh[i]; if (len > 0) { if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); @@ -565,7 +575,7 @@ void FixShardlow::initial_integrate(int vflag) } // Communicate the ghost deltas to the atom owners - if (airnum > 1) comm->reverse_comm_fix(this); + comm->reverse_comm_fix(this); } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index b9306ee3b1..cc107a55c4 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -199,7 +199,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds for (int airnum = 2; airnum <= 8; airnum++) { - list->AIRct_ssa[airnum - 1] = nb_ssa->gairct_ssa[airnum]; + int locAIRct = 0; for (i = gairhead_ssa[airnum]; i >= 0; i = bins[i]) { n = 0; neighptr = ipage->vget(); @@ -254,13 +254,17 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } } - if (n > 0) ilist[inum + (gnum++)] = i; + if (n > 0) { + ilist[inum + (gnum++)] = i; + ++locAIRct; + } firstneigh[i] = neighptr; numneigh[i] = n; ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one"); } + list->AIRct_ssa[airnum - 1] = locAIRct; } list->gnum = gnum; } From 3dddeef365cfcab63e909d8388cc0f674419f6cd Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 27 Jan 2017 14:02:56 -0500 Subject: [PATCH 131/267] USER-DPD: remove unneeded gairct_ssa[] & anum vars, and some > 0 guards --- src/USER-DPD/fix_shardlow.cpp | 15 +++++---------- src/USER-DPD/nbin_ssa.cpp | 4 ---- src/USER-DPD/nbin_ssa.h | 1 - src/USER-DPD/npair_half_bin_newton_ssa.cpp | 9 ++++----- 4 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 9253d17317..4fa323a9d8 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -499,7 +499,7 @@ void FixShardlow::ssa_update_dpde( void FixShardlow::initial_integrate(int vflag) { - int i,ii,inum,anum; + int i,ii,inum; int *ilist; int nlocal = atom->nlocal; @@ -532,7 +532,6 @@ void FixShardlow::initial_integrate(int vflag) v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0"); inum = list->inum; - anum = inum + list->gnum; ilist = list->ilist; dtsqrt = sqrt(update->dt); @@ -542,10 +541,8 @@ void FixShardlow::initial_integrate(int vflag) while (ii < inum) { i = ilist[ii]; int len = list->numneigh[i]; - if (len > 0) { - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); - } + if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); + else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); ii++; } @@ -567,10 +564,8 @@ void FixShardlow::initial_integrate(int vflag) while (ct-- > 0) { i = ilist[ii]; int len = list->numneigh[i]; - if (len > 0) { - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); - } + if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); + else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); ii++; } diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 321baf771a..7ea2117300 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -34,7 +34,6 @@ NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) binct_ssa = NULL; for (int i = 0; i < 9; i++) { gairhead_ssa[i] = -1; - gairct_ssa[i] = 0; } } @@ -69,7 +68,6 @@ void NBinSSA::bin_atoms() for (i = 0; i < 9; i++) { gairhead_ssa[i] = -1; - gairct_ssa[i] = 0; } for (i = 0; i < mbins; i++) { @@ -89,7 +87,6 @@ void NBinSSA::bin_atoms() if (mask[i] & bitmask) { bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; - ++(gairct_ssa[ibin]); } } } else { @@ -98,7 +95,6 @@ void NBinSSA::bin_atoms() if (ibin < 2) continue; // skip ghost atoms not in AIR bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; - ++(gairct_ssa[ibin]); } } for (i = nlocal-1; i >= 0; i--) { diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 48694370b9..4ec376200c 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -32,7 +32,6 @@ class NBinSSA : public NBinStandard { int *binlist_ssa; // index in neighlist of 1st local atom in each bin int *binct_ssa; // count of local atoms in each bin int gairhead_ssa[9]; // index of 1st ghost atom in each AIR - int gairct_ssa[9]; // count of ghost atoms in each AIR int maxbin_ssa; // size of binlist_ssa and binct_ssa arrays // Bounds of the local atoms in the binhead array diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index cc107a55c4..ccc41d1fc4 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -180,21 +180,20 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } } - ilist[inum++] = i; + if (n > 0) { + ilist[inum++] = i; + } firstneigh[i] = neighptr; numneigh[i] = n; ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); } - // verify count of atoms in ibin - if (binct_ssa[ibin] != (inum - binlist_ssa[ibin])) - error->one(FLERR,"binct_ssa didn't agree with length in ilist"); } } } - list->inum = inum; + list->AIRct_ssa[0] = list->inum = inum; // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds From f73c9a43aba9b5ff0837360ea7c3d4925906e5f6 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 27 Jan 2017 16:45:15 -0500 Subject: [PATCH 132/267] USER-DPD: remove broken code for building SSA half neighbor list from full --- src/USER-DPD/npair_halffull_newton_ssa.cpp | 136 --------------------- src/USER-DPD/npair_halffull_newton_ssa.h | 44 ------- 2 files changed, 180 deletions(-) delete mode 100644 src/USER-DPD/npair_halffull_newton_ssa.cpp delete mode 100644 src/USER-DPD/npair_halffull_newton_ssa.h diff --git a/src/USER-DPD/npair_halffull_newton_ssa.cpp b/src/USER-DPD/npair_halffull_newton_ssa.cpp deleted file mode 100644 index d0be1685b6..0000000000 --- a/src/USER-DPD/npair_halffull_newton_ssa.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: - James Larentzos and Timothy I. Mattox (Engility Corporation) -------------------------------------------------------------------------- */ - -#include "npair_halffull_newton_ssa.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "atom.h" -#include "atom_vec.h" -#include "molecule.h" -#include "domain.h" -#include "my_page.h" -#include "error.h" - -using namespace LAMMPS_NS; - -// allocate space for static class variable -// prototype for non-class function - -static int *ssaAIRptr; -static int cmp_ssaAIR(const void *, const void *); - -/* ---------------------------------------------------------------------- */ - -NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {} - -/* ---------------------------------------------------------------------- - build half list from full list for use by Shardlow Spliting Algorithm - pair stored once if i,j are both owned and i < j - if j is ghost, only store if j coords are "above and to the right" of i - works if full list is a skip list -------------------------------------------------------------------------- */ - -void NPairHalffullNewtonSSA::build(NeighList *list) -{ - int i,j,ii,jj,n,jnum,joriginal; - int *neighptr,*jlist; - - int nlocal = atom->nlocal; - int *ssaAIR = atom->ssaAIR; - - int *ilist = list->ilist; - int *numneigh = list->numneigh; - int **firstneigh = list->firstneigh; - MyPage *ipage = list->ipage; - - int *ilist_full = list->listfull->ilist; - int *numneigh_full = list->listfull->numneigh; - int **firstneigh_full = list->listfull->firstneigh; - int inum_full = list->listfull->inum; - - int inum = 0; - - error->one(FLERR,"NPairHalffullNewtonSSA not yet implemented for ghosts with neighbors."); - return; - - ipage->reset(); - - // loop over parent full list - - for (ii = 0; ii < inum_full; ii++) { - int AIRct[8] = { 0 }; - n = 0; - neighptr = ipage->vget(); - - i = ilist_full[ii]; - - // loop over full neighbor list - - jlist = firstneigh_full[i]; - jnum = numneigh_full[i]; - - for (jj = 0; jj < jnum; jj++) { - joriginal = jlist[jj]; - j = joriginal & NEIGHMASK; - if (j < nlocal) { - if (i > j) continue; - ++(AIRct[0]); - } else { - if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR - ++(AIRct[ssaAIR[j] - 1]); - } - neighptr[n++] = joriginal; - } - - ilist[inum++] = i; - firstneigh[i] = neighptr; - numneigh[i] = n; - ipage->vgot(n); - if (ipage->status()) - error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); - - // sort the locals+ghosts in the neighbor list by their ssaAIR number - - ssaAIRptr = atom->ssaAIR; - qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR); - - // do a prefix sum on the counts to turn them into indexes - - list->ndxAIR_ssa[i][0] = AIRct[0]; - for (int ndx = 1; ndx < 8; ++ndx) { - list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1]; - } - } - - list->inum = inum; -} - -/* ---------------------------------------------------------------------- - comparison function invoked by qsort() - accesses static class member ssaAIRptr, set before call to qsort() -------------------------------------------------------------------------- */ - -static int cmp_ssaAIR(const void *iptr, const void *jptr) -{ - int i = NEIGHMASK & *((int *) iptr); - int j = NEIGHMASK & *((int *) jptr); - if (ssaAIRptr[i] < ssaAIRptr[j]) return -1; - if (ssaAIRptr[i] > ssaAIRptr[j]) return 1; - return 0; -} - diff --git a/src/USER-DPD/npair_halffull_newton_ssa.h b/src/USER-DPD/npair_halffull_newton_ssa.h deleted file mode 100644 index 03903815b1..0000000000 --- a/src/USER-DPD/npair_halffull_newton_ssa.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef NPAIR_CLASS - -NPairStyle(halffull/newton/ssa, - NPairHalffullNewtonSSA, - NP_HALF_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | - NP_ORTHO | NP_TRI | NP_SSA) - -#else - -#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H -#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H - -#include "npair.h" - -namespace LAMMPS_NS { - -class NPairHalffullNewtonSSA : public NPair { - public: - NPairHalffullNewtonSSA(class LAMMPS *); - ~NPairHalffullNewtonSSA() {} - void build(class NeighList *); -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -*/ From 641bb4bb16c8f852af189ba1e71cd7b035e0c8e2 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 27 Jan 2017 16:47:19 -0500 Subject: [PATCH 133/267] USER-DPD: remove use of ssaAIR[], move coord2ssaAIR() to nbin_ssa.cpp Saves an int per atom and ghost, also simplifies and reduces code size. --- src/USER-DPD/fix_shardlow.cpp | 116 ---------------------------------- src/USER-DPD/fix_shardlow.h | 13 ---- src/USER-DPD/nbin_ssa.cpp | 40 +++++++++++- src/USER-DPD/nbin_ssa.h | 1 + 4 files changed, 38 insertions(+), 132 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 4fa323a9d8..05bf1602f9 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -109,26 +109,12 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) : if(pairDPD == NULL && pairDPDE == NULL) error->all(FLERR,"Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow"); - // Setup the ssaAIR array - atom->ssaAIR = NULL; - grow_arrays(atom->nmax); - memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal); - - // Setup callbacks for maintaining atom->ssaAIR[] - atom->add_callback(0); // grow (aka exchange) - atom->add_callback(1); // restart - atom->add_callback(2); // border } /* ---------------------------------------------------------------------- */ FixShardlow::~FixShardlow() { - atom->delete_callback(id, 0); - atom->delete_callback(id, 1); - atom->delete_callback(id, 2); - - memory->destroy(atom->ssaAIR); } /* ---------------------------------------------------------------------- */ @@ -137,7 +123,6 @@ int FixShardlow::setmask() { int mask = 0; mask |= INITIAL_INTEGRATE; - mask |= PRE_EXCHANGE | MIN_PRE_EXCHANGE; return mask; } @@ -161,27 +146,6 @@ void FixShardlow::init_list(int id, NeighList *ptr) /* ---------------------------------------------------------------------- */ -void FixShardlow::pre_exchange() -{ - memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal); -} - -/* ---------------------------------------------------------------------- */ - -void FixShardlow::setup_pre_exchange() -{ - memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal); -} - -/* ---------------------------------------------------------------------- */ - -void FixShardlow::min_pre_exchange() -{ - memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal); -} - -/* ---------------------------------------------------------------------- */ - void FixShardlow::setup(int vflag) { bool fixShardlow = false; @@ -659,91 +623,11 @@ void FixShardlow::unpack_reverse_comm(int n, int *list, double *buf) } } -/* ---------------------------------------------------------------------- - convert atom coords into the ssa active interaction region number -------------------------------------------------------------------------- */ - -int FixShardlow::coord2ssaAIR(double *x) -{ - int ix, iy, iz; - - ix = iy = iz = 0; - if (x[2] < domain->sublo[2]) iz = -1; - if (x[2] >= domain->subhi[2]) iz = 1; - if (x[1] < domain->sublo[1]) iy = -1; - if (x[1] >= domain->subhi[1]) iy = 1; - if (x[0] < domain->sublo[0]) ix = -1; - if (x[0] >= domain->subhi[0]) ix = 1; - - if(iz < 0){ - return -1; - } else if(iz == 0){ - if( iy<0 ) return -1; // bottom left/middle/right - if( (iy==0) && (ix<0) ) return -1; // left atoms - if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms - if( (iy==0) && (ix>0) ) return 3; // Right atoms - if( (iy>0) && (ix==0) ) return 2; // Top-middle atoms - if( (iy>0) && (ix!=0) ) return 4; // Top-right and top-left atoms - } else { // iz > 0 - if((ix==0) && (iy==0)) return 5; // Back atoms - if((ix==0) && (iy!=0)) return 6; // Top-back and bottom-back atoms - if((ix!=0) && (iy==0)) return 7; // Left-back and right-back atoms - if((ix!=0) && (iy!=0)) return 8; // Back corner atoms - } - - return -2; -} - /* ---------------------------------------------------------------------- */ -void FixShardlow::grow_arrays(int nmax) -{ - memory->grow(atom->ssaAIR,nmax,"fix_shardlow:ssaAIR"); -} - -void FixShardlow::copy_arrays(int i, int j, int delflag) -{ - atom->ssaAIR[j] = atom->ssaAIR[i]; -} - -void FixShardlow::set_arrays(int i) -{ - atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */ -} - -int FixShardlow::pack_border(int n, int *list, double *buf) -{ - for (int i = 0; i < n; i++) { - int j = list[i]; - if (atom->ssaAIR[j] == 0) atom->ssaAIR[j] = 1; // not purely local anymore - } - return 0; -} - -int FixShardlow::unpack_border(int n, int first, double *buf) -{ - int i,last = first + n; - for (i = first; i < last; i++) { - atom->ssaAIR[i] = coord2ssaAIR(atom->x[i]); - } - return 0; -} - -int FixShardlow::unpack_exchange(int i, double *buf) -{ - atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */ - return 0; -} - -void FixShardlow::unpack_restart(int i, int nth) -{ - atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */ -} - double FixShardlow::memory_usage() { double bytes = 0.0; - bytes += memory->usage(atom->ssaAIR,atom->nmax); bytes += sizeof(double)*3*atom->nghost; // v_t0[] return bytes; } diff --git a/src/USER-DPD/fix_shardlow.h b/src/USER-DPD/fix_shardlow.h index 2ffb96ae7c..6fd438b8f0 100644 --- a/src/USER-DPD/fix_shardlow.h +++ b/src/USER-DPD/fix_shardlow.h @@ -35,18 +35,6 @@ class FixShardlow : public Fix { virtual void init_list(int, class NeighList *); virtual void setup(int); virtual void initial_integrate(int); - void setup_pre_exchange(); - void pre_exchange(); - void min_pre_exchange(); - - void grow_arrays(int); - void copy_arrays(int, int, int); - void set_arrays(int); - - int pack_border(int, int *, double *); - int unpack_border(int, int, double *); - int unpack_exchange(int, double *); - void unpack_restart(int, int); double memory_usage(); @@ -63,7 +51,6 @@ class FixShardlow : public Fix { private: double dtsqrt; // = sqrt(update->dt); - int coord2ssaAIR(double *); // map atom coord to an AIR number void ssa_update_dpd(int, int *, int); // Constant Temperature void ssa_update_dpde(int, int *, int); // Constant Energy diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 7ea2117300..25a2fb3b35 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -20,6 +20,7 @@ #include "atom.h" #include "update.h" #include "group.h" +#include "domain.h" #include "memory.h" #include "error.h" @@ -58,7 +59,6 @@ void NBinSSA::bin_atoms() if (includegroup) nlocal = atom->nfirst; double **x = atom->x; int *mask = atom->mask; - int *ssaAIR = atom->ssaAIR; int xbin,ybin,zbin; last_bin = update->ntimestep; @@ -82,7 +82,7 @@ void NBinSSA::bin_atoms() int bitmask = group->bitmask[includegroup]; int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above for (i = nall-1; i >= nowned; i--) { - ibin = ssaAIR[i]; + ibin = coord2ssaAIR(x[i]); if (ibin < 2) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { bins[i] = gairhead_ssa[ibin]; @@ -91,7 +91,7 @@ void NBinSSA::bin_atoms() } } else { for (i = nall-1; i >= nlocal; i--) { - ibin = ssaAIR[i]; + ibin = coord2ssaAIR(x[i]); if (ibin < 2) continue; // skip ghost atoms not in AIR bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; @@ -148,3 +148,37 @@ bigint NBinSSA::memory_usage() } return bytes; } + +/* ---------------------------------------------------------------------- + convert atom coords into the ssa active interaction region number +------------------------------------------------------------------------- */ +int NBinSSA::coord2ssaAIR(const double *x) +{ + int ix, iy, iz; + + ix = iy = iz = 0; + if (x[2] < domain->sublo[2]) iz = -1; + if (x[2] >= domain->subhi[2]) iz = 1; + if (x[1] < domain->sublo[1]) iy = -1; + if (x[1] >= domain->subhi[1]) iy = 1; + if (x[0] < domain->sublo[0]) ix = -1; + if (x[0] >= domain->subhi[0]) ix = 1; + + if(iz < 0){ + return -1; + } else if(iz == 0){ + if( iy<0 ) return -1; // bottom left/middle/right + if( (iy==0) && (ix<0) ) return -1; // left atoms + if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms + if( (iy==0) && (ix>0) ) return 3; // Right atoms + if( (iy>0) && (ix==0) ) return 2; // Top-middle atoms + if( (iy>0) && (ix!=0) ) return 4; // Top-right and top-left atoms + } else { // iz > 0 + if((ix==0) && (iy==0)) return 5; // Back atoms + if((ix==0) && (iy!=0)) return 6; // Top-back and bottom-back atoms + if((ix!=0) && (iy==0)) return 7; // Left-back and right-back atoms + if((ix!=0) && (iy!=0)) return 8; // Back corner atoms + } + + return -2; +} diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 4ec376200c..5db5a0fa41 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -155,6 +155,7 @@ class NBinSSA : public NBinStandard { } private: + int coord2ssaAIR(const double *); // map atom coord to an AIR number double bboxlo_[3],bboxhi_[3]; }; From ce2da5068b6e77c5a508e554ab824dc54328bd21 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 30 Jan 2017 13:01:28 -0500 Subject: [PATCH 134/267] USER-DPD: renumber AIRs back to 1-7 for ghosts, and just 0 for locals. This removes the the distinction between pure and impure locals. Pure and impure locals messed up the directionality of half neighbor lists, which turns out is crucial to the approach for SSA with kokkos. --- src/USER-DPD/nbin_ssa.cpp | 22 +++++++++++----------- src/USER-DPD/nbin_ssa.h | 2 +- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 25a2fb3b35..7e603af714 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -33,7 +33,7 @@ NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) maxbin_ssa = 0; binlist_ssa = NULL; binct_ssa = NULL; - for (int i = 0; i < 9; i++) { + for (int i = 0; i < 8; i++) { gairhead_ssa[i] = -1; } } @@ -66,7 +66,7 @@ void NBinSSA::bin_atoms() bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; - for (i = 0; i < 9; i++) { + for (i = 0; i < 8; i++) { gairhead_ssa[i] = -1; } @@ -83,7 +83,7 @@ void NBinSSA::bin_atoms() int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above for (i = nall-1; i >= nowned; i--) { ibin = coord2ssaAIR(x[i]); - if (ibin < 2) continue; // skip ghost atoms not in AIR + if (ibin < 1) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; @@ -92,7 +92,7 @@ void NBinSSA::bin_atoms() } else { for (i = nall-1; i >= nlocal; i--) { ibin = coord2ssaAIR(x[i]); - if (ibin < 2) continue; // skip ghost atoms not in AIR + if (ibin < 1) continue; // skip ghost atoms not in AIR bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; } @@ -170,14 +170,14 @@ int NBinSSA::coord2ssaAIR(const double *x) if( iy<0 ) return -1; // bottom left/middle/right if( (iy==0) && (ix<0) ) return -1; // left atoms if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms - if( (iy==0) && (ix>0) ) return 3; // Right atoms - if( (iy>0) && (ix==0) ) return 2; // Top-middle atoms - if( (iy>0) && (ix!=0) ) return 4; // Top-right and top-left atoms + if( (iy==0) && (ix>0) ) return 2; // Right atoms + if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms + if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms } else { // iz > 0 - if((ix==0) && (iy==0)) return 5; // Back atoms - if((ix==0) && (iy!=0)) return 6; // Top-back and bottom-back atoms - if((ix!=0) && (iy==0)) return 7; // Left-back and right-back atoms - if((ix!=0) && (iy!=0)) return 8; // Back corner atoms + if((ix==0) && (iy==0)) return 4; // Back atoms + if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms + if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms + if((ix!=0) && (iy!=0)) return 7; // Back corner atoms } return -2; diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index 5db5a0fa41..f26f8c77f0 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -31,7 +31,7 @@ class NBinSSA : public NBinStandard { int *binlist_ssa; // index in neighlist of 1st local atom in each bin int *binct_ssa; // count of local atoms in each bin - int gairhead_ssa[9]; // index of 1st ghost atom in each AIR + int gairhead_ssa[8]; // index of 1st ghost atom in each AIR int maxbin_ssa; // size of binlist_ssa and binct_ssa arrays // Bounds of the local atoms in the binhead array diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index ccc41d1fc4..f3b7094bd8 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -197,7 +197,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds - for (int airnum = 2; airnum <= 8; airnum++) { + for (int airnum = 1; airnum <= 7; airnum++) { int locAIRct = 0; for (i = gairhead_ssa[airnum]; i >= 0; i = bins[i]) { n = 0; @@ -263,7 +263,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (ipage->status()) error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one"); } - list->AIRct_ssa[airnum - 1] = locAIRct; + list->AIRct_ssa[airnum] = locAIRct; } list->gnum = gnum; } From ee83b755eae4f3167fd2ab8b50bbf86cab3407ee Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 30 Jan 2017 14:32:18 -0500 Subject: [PATCH 135/267] USER-DPD: Split the SSA stencil and neighbor list into subphases. NOTE: pair evaluation order changes, causing numerical differences! This enables processing neighbors in subphase groups that enforce a geometrical seperation of pairs, allowing greater parallelism once fix_shardlow (SSA) is converted to Kokkos. --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 52 +++++---- .../nstencil_half_bin_2d_newton_ssa.cpp | 60 ++++++++-- .../nstencil_half_bin_3d_newton_ssa.cpp | 110 ++++++++++++++---- src/USER-DPD/nstencil_ssa.h | 2 +- 4 files changed, 165 insertions(+), 59 deletions(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index f3b7094bd8..77b20966b0 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -74,7 +74,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) NStencilSSA *ns_ssa = dynamic_cast(ns); if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object"); - int nstencil_half = ns_ssa->nstencil_half; + int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]); int nstencil_full = ns_ssa->nstencil; NBinSSA *nb_ssa = dynamic_cast(nb); @@ -150,34 +150,38 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) // loop over all local atoms in other bins in "half" stencil - for (k = 0; k < nstencil_half; k++) { - for (j = binhead[ibin+stencil[k]]; j >= 0; - j = bins[j]) { + k = 0; + for (int subphase = 0; subphase < 4; subphase++) { + for (; k < nstencil_ssa[subphase]; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; + j = bins[j]) { - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } } } + list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase } if (n > 0) { diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp index 254339bffc..af337a38c6 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp @@ -42,31 +42,69 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) : void NStencilHalfBin2dNewtonSSA::create() { int i,j,pos = 0; - + // Subphase 0: upper right front bins (red) for (j = 0; j <= sy; j++) - for (i = -sx; i <= sx; i++) - if (j > 0 || (j == 0 && i > 0)) + for (i = 0; i <= sx; i++) + if (j > 0 || i > 0) // skip the centroid if (bin_distance(i,j,0) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } + nstencil_ssa[0] = pos; - nstencil_half = pos; // record where normal half stencil ends - - // include additional bins for AIR ghosts only - - for (j = -sy; j <= 0; j++) - for (i = -sx; i <= sx; i++) { - if (j == 0 && i > 0) continue; + // Subphase 1: upper left front bins (light blue) + for (j = 1; j <= sy; j++) + for (i = -sx; i < 0; i++) if (bin_distance(i,j,0) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } - } + nstencil_ssa[1] = pos; + + // Subphase 2: lower left front bins (blue) + nstencil_ssa[2] = pos; + + // Subphase 3: lower right front bins (yellow) + nstencil_ssa[3] = pos; + + // Now include additional bins for AIR ghosts, and impure-to-pure locals + // Subphase 4: upper right back bins (pink) + nstencil_ssa[4] = pos; + + // Subphase 5: upper left back bins (light green) + nstencil_ssa[5] = pos; + + // Subphase 6: lower left back bins (purple) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) + if (bin_distance(i,j,0) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = 0; + stencil[pos++] = j*mbinx + i; + } + nstencil_ssa[6] = pos; + + // Subphase 7: lower right back bins (white) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) + if (bin_distance(i,j,0) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = 0; + stencil[pos++] = j*mbinx + i; + } + nstencil_ssa[7] = pos; + + // Also, include the centroid for the AIR ghosts. + stencilxyz[pos][0] = 0; + stencilxyz[pos][1] = 0; + stencilxyz[pos][2] = 0; + stencil[pos++] = 0; nstencil = pos; // record where full stencil ends } diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp index 1e2c18c66a..a2911a6d7b 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp @@ -42,45 +42,109 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) : void NStencilHalfBin3dNewtonSSA::create() { int i,j,k,pos = 0; - + // Subphase 0: upper right front bins (red) for (k = 0; k <= sz; k++) - for (j = -sy; j <= sy; j++) - for (i = -sx; i <= sx; i++) - if (k > 0 || j > 0 || (j == 0 && i > 0)) + for (j = 0; j <= sy; j++) + for (i = 0; i <= sx; i++) + if (k > 0 || j > 0 || i > 0) // skip the centroid if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } + nstencil_ssa[0] = pos; - nstencil_half = pos; // record where normal half stencil ends - - // include additional bins for AIR ghosts only - - for (k = -sz; k < 0; k++) - for (j = -sy; j <= sy; j++) - for (i = -sx; i <= sx; i++) + // Subphase 1: upper left front bins (light blue) + for (k = 0; k <= sz; k++) + for (j = 1; j <= sy; j++) + for (i = -sx; i < 0; i++) if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } + nstencil_ssa[1] = pos; - // For k==0, make sure to skip already included bins + // Subphase 2: lower left front bins (blue) + for (k = 1; k <= sz; k++) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[2] = pos; - k = 0; - for (j = -sy; j <= 0; j++) - for (i = -sx; i <= sx; i++) { - if (j == 0 && i > 0) continue; - if (bin_distance(i,j,k) < cutneighmaxsq) { - stencilxyz[pos][0] = i; - stencilxyz[pos][1] = j; - stencilxyz[pos][2] = k; - stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; - } - } + // Subphase 3: lower right front bins (yellow) + for (k = 1; k <= sz; k++) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[3] = pos; + + // Now include additional bins for AIR ghosts, and impure-to-pure locals + // Subphase 4: upper right back bins (pink) + for (k = -sz; k < 0; k++) + for (j = 0; j <= sy; j++) + for (i = 0; i <= sx; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[4] = pos; + + // Subphase 5: upper left back bins (light green) + for (k = -sz; k < 0; k++) + for (j = 1; j <= sy; j++) + for (i = -sx; i < 0; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[5] = pos; + + // Subphase 6: lower left back bins (purple) + for (k = -sz; k <= 0; k++) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[6] = pos; + + // Subphase 7: lower right back bins (white) + for (k = -sz; k <= 0; k++) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) + if (bin_distance(i,j,k) < cutneighmaxsq) { + stencilxyz[pos][0] = i; + stencilxyz[pos][1] = j; + stencilxyz[pos][2] = k; + stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; + } + nstencil_ssa[7] = pos; + + // Also, include the centroid for the AIR ghosts. + stencilxyz[pos][0] = 0; + stencilxyz[pos][1] = 0; + stencilxyz[pos][2] = 0; + stencil[pos++] = 0; nstencil = pos; // record where full stencil ends } diff --git a/src/USER-DPD/nstencil_ssa.h b/src/USER-DPD/nstencil_ssa.h index e6dfce60f4..a5e3723271 100644 --- a/src/USER-DPD/nstencil_ssa.h +++ b/src/USER-DPD/nstencil_ssa.h @@ -24,7 +24,7 @@ class NStencilSSA : public NStencil { ~NStencilSSA() {} virtual void create() = 0; - int nstencil_half; // where the half stencil ends + int nstencil_ssa[8]; // last stencil index for each subphase }; } From be166cb5bf3743b0598009570d44cfe96b327979 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 30 Jan 2017 15:03:43 -0500 Subject: [PATCH 136/267] USER-DPD: Use subphases when processing AIR zero (locals) in SSA. NOTE: pair ordering was NOT changed, but tiny differences could occur. --- src/USER-DPD/fix_shardlow.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 05bf1602f9..4220760a9b 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -504,9 +504,14 @@ void FixShardlow::initial_integrate(int vflag) // process neighbors in the local AIR while (ii < inum) { i = ilist[ii]; - int len = list->numneigh[i]; - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); + for (int subphase = 0; subphase < 4; subphase++) { + int start = (subphase > 0) ? list->ndxAIR_ssa[i][subphase - 1] : 0; + int len = list->ndxAIR_ssa[i][subphase] - start; + if (len > 0) { + if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len); + else ssa_update_dpd(i, &(list->firstneigh[i][start]), len); + } + } ii++; } From 52aaad907f356e6ab1e553512d343fcf641a547a Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Feb 2017 12:18:27 -0500 Subject: [PATCH 137/267] USER-DPD: SSA with Kokkos: Reorder stencil subphases to make things easier. --- .../nstencil_half_bin_2d_newton_ssa.cpp | 16 ++++++------- .../nstencil_half_bin_3d_newton_ssa.cpp | 24 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp index af337a38c6..084d5b0602 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp @@ -65,10 +65,10 @@ void NStencilHalfBin2dNewtonSSA::create() } nstencil_ssa[1] = pos; - // Subphase 2: lower left front bins (blue) + // Subphase 2: lower right front bins (yellow) nstencil_ssa[2] = pos; - // Subphase 3: lower right front bins (yellow) + // Subphase 3: lower left front bins (blue) nstencil_ssa[3] = pos; // Now include additional bins for AIR ghosts, and impure-to-pure locals @@ -78,9 +78,9 @@ void NStencilHalfBin2dNewtonSSA::create() // Subphase 5: upper left back bins (light green) nstencil_ssa[5] = pos; - // Subphase 6: lower left back bins (purple) - for (j = -sy; j <= 0; j++) - for (i = -sx; i < 0; i++) + // Subphase 6: lower right back bins (white) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) if (bin_distance(i,j,0) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; @@ -89,9 +89,9 @@ void NStencilHalfBin2dNewtonSSA::create() } nstencil_ssa[6] = pos; - // Subphase 7: lower right back bins (white) - for (j = -sy; j < 0; j++) - for (i = 0; i <= sx; i++) + // Subphase 7: lower left back bins (purple) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) if (bin_distance(i,j,0) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp index a2911a6d7b..1741a1e847 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp @@ -67,10 +67,10 @@ void NStencilHalfBin3dNewtonSSA::create() } nstencil_ssa[1] = pos; - // Subphase 2: lower left front bins (blue) + // Subphase 2: lower right front bins (yellow) for (k = 1; k <= sz; k++) - for (j = -sy; j <= 0; j++) - for (i = -sx; i < 0; i++) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; @@ -79,10 +79,10 @@ void NStencilHalfBin3dNewtonSSA::create() } nstencil_ssa[2] = pos; - // Subphase 3: lower right front bins (yellow) + // Subphase 3: lower left front bins (blue) for (k = 1; k <= sz; k++) - for (j = -sy; j < 0; j++) - for (i = 0; i <= sx; i++) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; @@ -116,10 +116,10 @@ void NStencilHalfBin3dNewtonSSA::create() } nstencil_ssa[5] = pos; - // Subphase 6: lower left back bins (purple) + // Subphase 6: lower right back bins (white) for (k = -sz; k <= 0; k++) - for (j = -sy; j <= 0; j++) - for (i = -sx; i < 0; i++) + for (j = -sy; j < 0; j++) + for (i = 0; i <= sx; i++) if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; @@ -128,10 +128,10 @@ void NStencilHalfBin3dNewtonSSA::create() } nstencil_ssa[6] = pos; - // Subphase 7: lower right back bins (white) + // Subphase 7: lower left back bins (purple) for (k = -sz; k <= 0; k++) - for (j = -sy; j < 0; j++) - for (i = 0; i <= sx; i++) + for (j = -sy; j <= 0; j++) + for (i = -sx; i < 0; i++) if (bin_distance(i,j,k) < cutneighmaxsq) { stencilxyz[pos][0] = i; stencilxyz[pos][1] = j; From 151b3f552bfdfcc915a4ed429b210342a3e1837f Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Feb 2017 12:53:45 -0500 Subject: [PATCH 138/267] USER-DPD: Save pointer to the NPair used to create the NeighList Gives a user of NeighList access to data stored in a custom NPair --- src/neigh_list.cpp | 1 + src/neigh_list.h | 1 + src/neighbor.cpp | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index edc8634373..e8fd4130fc 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -79,6 +79,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) // USER-DPD package ndxAIR_ssa = NULL; + np = NULL; } /* ---------------------------------------------------------------------- */ diff --git a/src/neigh_list.h b/src/neigh_list.h index 7649245e99..ea88e9b28b 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -82,6 +82,7 @@ class NeighList : protected Pointers { int AIRct_ssa[8]; // count of how many atoms in each AIR uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR + class NPair *np; // ptr to NPair instance I depend on // methods diff --git a/src/neighbor.cpp b/src/neighbor.cpp index e0b84cc410..148dcbd7e9 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -785,7 +785,7 @@ int Neighbor::init_pair() } PairCreator pair_creator = pairclass[flag-1]; - neigh_pair[i] = pair_creator(lmp); + lists[i]->np = neigh_pair[i] = pair_creator(lmp); neigh_pair[i]->post_constructor(requests[i]); neigh_pair[i]->istyle = flag; From ab32d136b97e0f83e4a8e21e60251394623f1787 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Feb 2017 13:03:07 -0500 Subject: [PATCH 139/267] USER-DPD: SSA with Kokkos: make stencil's sx, sy, sz variables public --- src/nstencil.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nstencil.h b/src/nstencil.h index 7985d23202..a4c6a4af66 100644 --- a/src/nstencil.h +++ b/src/nstencil.h @@ -30,6 +30,7 @@ class NStencil : protected Pointers { int *nstencil_multi; // # bins in each type-based multi stencil int **stencil_multi; // list of bin offsets in each stencil double **distsq_multi; // sq distances to bins in each stencil + int sx,sy,sz; // extent of stencil in each dim double cutoff_custom; // cutoff set by requestor @@ -64,7 +65,6 @@ class NStencil : protected Pointers { int xyzflag; // 1 if stencilxyz is allocated int maxstencil; // max size of stencil int maxstencil_multi; // max sizes of stencils - int sx,sy,sz; // extent of stencil in each dim int dimension; From 4b3197202ba2fc56f946376719bce24f1452897f Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Feb 2017 13:38:49 -0500 Subject: [PATCH 140/267] USER-DPD: Rework SSA to use a new neighbor list structure, ready for Kokkos NOTE: pair evaluation order changes, causing numerical differences! Atom pair processing order is fully planned out in npair_half_bin_newton_ssa Makes the SSA neighbor list structure very different. Do not use by others! Each local is in ilist, numneigh, and firstneigh four times instead of once. Changes LAMMPS core code that had been previously changed for USER-DPD/SSA: Removes ssaAIR[] from class Atom as it is now unused. Removes ndxAIR_ssa[] from class NeighList as it is now unused. Increases length of ilist[], numneigh[], and firstneigh[] if SSA flag set. --- src/USER-DPD/fix_shardlow.cpp | 39 ++-- src/USER-DPD/nbin_ssa.cpp | 20 -- src/USER-DPD/nbin_ssa.h | 3 - src/USER-DPD/npair_half_bin_newton_ssa.cpp | 204 ++++++++++++++------- src/USER-DPD/npair_half_bin_newton_ssa.h | 11 +- src/atom.cpp | 2 - src/atom.h | 1 - src/neigh_list.cpp | 22 +-- src/neigh_list.h | 1 - 9 files changed, 174 insertions(+), 129 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 4220760a9b..4a7fff66cf 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -55,6 +55,7 @@ #include "pair_dpd_fdt.h" #include "pair_dpd_fdt_energy.h" #include "pair.h" +#include "npair_half_bin_newton_ssa.h" #include "citeme.h" using namespace LAMMPS_NS; @@ -500,19 +501,30 @@ void FixShardlow::initial_integrate(int vflag) dtsqrt = sqrt(update->dt); - ii = 0; + NPairHalfBinNewtonSSA *np_ssa = dynamic_cast(list->np); + if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairHalfBinNewtonSSA object"); + int ssa_phaseCt = np_ssa->ssa_phaseCt; + int *ssa_phaseLen = np_ssa->ssa_phaseLen; + int **ssa_itemLoc = np_ssa->ssa_itemLoc; + int **ssa_itemLen = np_ssa->ssa_itemLen; + // process neighbors in the local AIR - while (ii < inum) { - i = ilist[ii]; - for (int subphase = 0; subphase < 4; subphase++) { - int start = (subphase > 0) ? list->ndxAIR_ssa[i][subphase - 1] : 0; - int len = list->ndxAIR_ssa[i][subphase] - start; - if (len > 0) { - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][start]), len); + for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { + int workItemCt = ssa_phaseLen[workPhase]; + + for (int workItem = 0; workItem < workItemCt; ++workItem) { + int ct = ssa_itemLen[workPhase][workItem]; + ii = ssa_itemLoc[workPhase][workItem]; + + while (ct-- > 0) { + int len = list->numneigh[ii]; + if (len > 0) { + if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len); + else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len); + } + ii++; } } - ii++; } ii = inum; @@ -531,10 +543,9 @@ void FixShardlow::initial_integrate(int vflag) // process neighbors in this AIR while (ct-- > 0) { - i = ilist[ii]; - int len = list->numneigh[i]; - if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][0]), len); - else ssa_update_dpd(i, &(list->firstneigh[i][0]), len); + int len = list->numneigh[ii]; + if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len); + else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len); ii++; } diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 7e603af714..4c57a8e70f 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -30,9 +30,6 @@ using namespace LAMMPS_NS; NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) { - maxbin_ssa = 0; - binlist_ssa = NULL; - binct_ssa = NULL; for (int i = 0; i < 8; i++) { gairhead_ssa[i] = -1; } @@ -40,8 +37,6 @@ NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp) NBinSSA::~NBinSSA() { - memory->destroy(binlist_ssa); - memory->destroy(binct_ssa); } /* ---------------------------------------------------------------------- @@ -72,8 +67,6 @@ void NBinSSA::bin_atoms() for (i = 0; i < mbins; i++) { binhead[i] = -1; - binlist_ssa[i] = -1; - binct_ssa[i] = 0; } // bin in reverse order so linked list will be in forward order @@ -108,7 +101,6 @@ void NBinSSA::bin_atoms() if (zbin >= lbinzhi) lbinzhi = zbin + 1; bins[i] = binhead[ibin]; binhead[ibin] = i; - ++(binct_ssa[ibin]); } } @@ -119,14 +111,6 @@ void NBinSSA::bin_atoms_setup(int nall) { NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too - if (mbins > maxbin_ssa) { - maxbin_ssa = mbins; - memory->destroy(binlist_ssa); - memory->destroy(binct_ssa); - memory->create(binlist_ssa,maxbin_ssa,"binlist_ssa"); - memory->create(binct_ssa,maxbin_ssa,"binct_ssa"); - } - // Clear the local bin extent bounding box. lbinxlo = mbinx - 1; // Safe to = stencil->sx + 1 lbinylo = mbiny - 1; // Safe to = stencil->sy + 1 @@ -142,10 +126,6 @@ bigint NBinSSA::memory_usage() { bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too - if (maxbin_ssa) { - bytes += memory->usage(binlist_ssa,maxbin_ssa); - bytes += memory->usage(binct_ssa,maxbin_ssa); - } return bytes; } diff --git a/src/USER-DPD/nbin_ssa.h b/src/USER-DPD/nbin_ssa.h index f26f8c77f0..2a0175081e 100644 --- a/src/USER-DPD/nbin_ssa.h +++ b/src/USER-DPD/nbin_ssa.h @@ -29,10 +29,7 @@ namespace LAMMPS_NS { class NBinSSA : public NBinStandard { public: - int *binlist_ssa; // index in neighlist of 1st local atom in each bin - int *binct_ssa; // count of local atoms in each bin int gairhead_ssa[8]; // index of 1st ghost atom in each AIR - int maxbin_ssa; // size of binlist_ssa and binct_ssa arrays // Bounds of the local atoms in the binhead array int lbinxlo; // lowest local bin x-dim coordinate diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 77b20966b0..2c787d6398 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -34,7 +34,27 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {} +NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) +{ + ssa_maxPhaseCt = 0; + ssa_maxPhaseLen = 0; + ssa_phaseCt = 0; + ssa_phaseLen = NULL; + ssa_itemLoc = NULL; + ssa_itemLen = NULL; +} + +/* ---------------------------------------------------------------------- */ + +NPairHalfBinNewtonSSA::~NPairHalfBinNewtonSSA() +{ + ssa_maxPhaseCt = 0; + ssa_maxPhaseLen = 0; + ssa_phaseCt = 0; + memory->destroy(ssa_phaseLen); + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); +} /* ---------------------------------------------------------------------- binned neighbor list construction with full Newton's 3rd law @@ -81,8 +101,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object"); int *bins = nb_ssa->bins; int *binhead = nb_ssa->binhead; - int *binlist_ssa = nb_ssa->binlist_ssa; - int *binct_ssa = nb_ssa->binct_ssa; int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]); int inum = 0; @@ -96,74 +114,81 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) int lbinzlo = nb_ssa->lbinzlo; int lbinzhi = nb_ssa->lbinzhi; + int sx1 = ns_ssa->sx + 1; + int sy1 = ns_ssa->sy + 1; + int sz1 = ns_ssa->sz + 1; + + ssa_phaseCt = sz1*sy1*sx1; + + xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1; + ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; + zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; + + int phaseLenEstimate = xbin*ybin*zbin; + + if (ssa_phaseCt > ssa_maxPhaseCt) { + ssa_maxPhaseCt = ssa_phaseCt; + ssa_maxPhaseLen = 0; + memory->destroy(ssa_phaseLen); + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); + memory->create(ssa_phaseLen,ssa_maxPhaseCt,"NPairHalfBinNewtonSSA:ssa_phaseLen"); + } + + if (phaseLenEstimate > ssa_maxPhaseLen) { + ssa_maxPhaseLen = phaseLenEstimate; + memory->destroy(ssa_itemLoc); + memory->destroy(ssa_itemLen); + memory->create(ssa_itemLoc,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLoc"); + memory->create(ssa_itemLen,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLen"); + } + ipage->reset(); + int workPhase = 0; // loop over bins with local atoms, storing half of the neighbors - for (zbin = lbinzlo; zbin < lbinzhi; zbin++) { - for (ybin = lbinylo; ybin < lbinyhi; ybin++) { - for (xbin = lbinxlo; xbin < lbinxhi; xbin++) { - ibin = zbin*mbiny*mbinx + ybin*mbinx + xbin; - binlist_ssa[ibin] = inum; // record where ibin starts in ilist - for (i = binhead[ibin]; i >= 0; i = bins[i]) { - n = 0; - neighptr = ipage->vget(); + for (int zoff = ns_ssa->sz; zoff >= 0; --zoff) { + for (int yoff = ns_ssa->sy; yoff >= 0; --yoff) { + for (int xoff = ns_ssa->sx; xoff >= 0; --xoff) { + int workItem = 0; + for (zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { + for (ybin = lbinylo + yoff - ns_ssa->sy; ybin < lbinyhi; ybin += sy1) { + for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sz1) { + if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); + ssa_itemLoc[workPhase][workItem] = inum; // record where workItem starts in ilist - itype = type[i]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - if (moltemplate) { - imol = molindex[i]; - iatom = molatom[i]; - tagprev = tag[i] - iatom - 1; - } - - // loop over rest of local atoms in i's bin - // just store them, since j is beyond i in linked list - - for (j = bins[i]; j >= 0; j = bins[j]) { - - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } - - // loop over all local atoms in other bins in "half" stencil - - k = 0; for (int subphase = 0; subphase < 4; subphase++) { - for (; k < nstencil_ssa[subphase]; k++) { - for (j = binhead[ibin+stencil[k]]; j >= 0; - j = bins[j]) { + int s_ybin = ybin + ((subphase & 0x2) ? ns_ssa->sy : 0); + int s_xbin = xbin + ((subphase & 0x1) ? ns_ssa->sx : 0); + int ibin, ct; + if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue; + if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; + ibin = zbin*nb_ssa->mbiny*nb_ssa->mbinx + + s_ybin*nb_ssa->mbinx + + s_xbin; + + for (i = binhead[ibin]; i >= 0; i = bins[i]) { + n = 0; + neighptr = ipage->vget(); + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + if (moltemplate) { + imol = molindex[i]; + iatom = molatom[i]; + tagprev = tag[i] - iatom - 1; + } + // loop over rest of local atoms in i's bin if this is subphase 0 + // just store them, since j is beyond i in linked list + if (subphase == 0) for (j = bins[i]; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= cutneighsq[itype][jtype]) { if (molecular) { if (!moltemplate) @@ -180,22 +205,59 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } else neighptr[n++] = j; } } - } - list->ndxAIR_ssa[i][subphase] = n; // record end of this subphase - } - if (n > 0) { - ilist[inum++] = i; + // loop over all local atoms in other bins in "subphase" of stencil + k = (subphase > 0) ? nstencil_ssa[subphase - 1] : 0; + for (; k < nstencil_ssa[subphase]; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + if (n > 0) { + firstneigh[inum] = neighptr; + numneigh[inum] = n; + ilist[inum++] = i; + } + ipage->vgot(n); + if (ipage->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } } - firstneigh[i] = neighptr; - numneigh[i] = n; - ipage->vgot(n); - if (ipage->status()) - error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + // record where workItem ends in ilist + ssa_itemLen[workPhase][workItem] = inum - ssa_itemLoc[workPhase][workItem]; + if (ssa_itemLen[workPhase][workItem] > 0) workItem++; } } } + + // record where workPhase ends + ssa_phaseLen[workPhase++] = workItem; } + } + } + + if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); list->AIRct_ssa[0] = list->inum = inum; @@ -258,11 +320,11 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } if (n > 0) { + firstneigh[inum + gnum] = neighptr; + numneigh[inum + gnum] = n; ilist[inum + (gnum++)] = i; ++locAIRct; } - firstneigh[i] = neighptr; - numneigh[i] = n; ipage->vgot(n); if (ipage->status()) error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one"); diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.h b/src/USER-DPD/npair_half_bin_newton_ssa.h index c9ccbc4bd9..ea292316ca 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.h +++ b/src/USER-DPD/npair_half_bin_newton_ssa.h @@ -28,9 +28,18 @@ namespace LAMMPS_NS { class NPairHalfBinNewtonSSA : public NPair { public: + // SSA Work plan data structures + int ssa_phaseCt; + int *ssa_phaseLen; + int **ssa_itemLoc; + int **ssa_itemLen; + NPairHalfBinNewtonSSA(class LAMMPS *); - ~NPairHalfBinNewtonSSA() {} + ~NPairHalfBinNewtonSSA(); void build(class NeighList *); + private: + int ssa_maxPhaseCt; + int ssa_maxPhaseLen; }; } diff --git a/src/atom.cpp b/src/atom.cpp index 0920dc3a02..de98b65470 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -99,7 +99,6 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp) uCond = uMech = uChem = uCG = uCGnew = NULL; duChem = NULL; dpdTheta = NULL; - ssaAIR = NULL; // USER-SMD @@ -296,7 +295,6 @@ Atom::~Atom() memory->destroy(uCG); memory->destroy(uCGnew); memory->destroy(duChem); - memory->destroy(ssaAIR); memory->destroy(nspecial); memory->destroy(special); diff --git a/src/atom.h b/src/atom.h index de7cda06ac..745377cee1 100644 --- a/src/atom.h +++ b/src/atom.h @@ -93,7 +93,6 @@ class Atom : protected Pointers { double *duChem; double *dpdTheta; int nspecies_dpd; - int *ssaAIR; // Shardlow Splitting Algorithm Active Interaction Region number // molecular info diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index e8fd4130fc..6376637832 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -78,7 +78,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) // USER-DPD package - ndxAIR_ssa = NULL; + for (int i = 0; i < 8; i++) AIRct_ssa[i] = 0; np = NULL; } @@ -98,10 +98,6 @@ NeighList::~NeighList() delete [] iskip; memory->destroy(ijskip); - - if (ssa) { - memory->sfree(ndxAIR_ssa); - } } /* ---------------------------------------------------------------------- @@ -202,14 +198,16 @@ void NeighList::grow(int nlocal, int nall) if (listmiddle) listmiddle->grow(nlocal,nall); // skip if data structs are already big enough - - if (ghost) { + if (ssa) { + if ((nlocal * 3) + nall <= maxatom) return; + } else if (ghost) { if (nall <= maxatom) return; } else { if (nlocal <= maxatom) return; } - maxatom = atom->nmax; + if (ssa) maxatom = (nlocal * 3) + nall; + else maxatom = atom->nmax; memory->destroy(ilist); memory->destroy(numneigh); @@ -223,12 +221,6 @@ void NeighList::grow(int nlocal, int nall) firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *), "neighlist:firstdouble"); } - - if (ssa) { - if (ndxAIR_ssa) memory->sfree(ndxAIR_ssa); - ndxAIR_ssa = (uint16_t (*)[8]) memory->smalloc(sizeof(uint16_t)*8*maxatom, - "neighlist:ndxAIR_ssa"); - } } /* ---------------------------------------------------------------------- @@ -305,7 +297,5 @@ bigint NeighList::memory_usage() } } - if (ndxAIR_ssa) bytes += sizeof(uint16_t) * 8 * maxatom; - return bytes; } diff --git a/src/neigh_list.h b/src/neigh_list.h index ea88e9b28b..bef512512c 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -81,7 +81,6 @@ class NeighList : protected Pointers { // USER-DPD package and Shardlow Splitting Algorithm (SSA) support int AIRct_ssa[8]; // count of how many atoms in each AIR - uint16_t (*ndxAIR_ssa)[8]; // for each atom, last neighbor index of each AIR class NPair *np; // ptr to NPair instance I depend on // methods From e0bafa499d55d0f273b23adf87cbaf824d5f9f11 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 15 Feb 2017 15:03:40 -0500 Subject: [PATCH 141/267] indentation fixes in npair_kokkos.cpp, plus a comment question --- src/KOKKOS/npair_kokkos.cpp | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index f49e44c352..4f17835717 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -170,7 +170,7 @@ void NPairKokkos::build(NeighList *list_) data.special_flag[2] = special_flag[2]; data.special_flag[3] = special_flag[3]; - if(list->d_neighbors.dimension_0()d_neighbors.dimension_0()d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs); list->d_numneigh = typename ArrayTypes::t_int_1d("numneigh", nall*1.1); data.neigh_list.d_neighbors = list->d_neighbors; @@ -179,10 +179,10 @@ void NPairKokkos::build(NeighList *list_) data.h_resize()=1; while(data.h_resize()) { data.h_new_maxneighs() = list->maxneighs; - data.h_resize() = 0; + data.h_resize() = 0; - Kokkos::deep_copy(data.resize, data.h_resize); - Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); + Kokkos::deep_copy(data.resize, data.h_resize); + Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); #ifdef KOKKOS_HAVE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; @@ -191,27 +191,27 @@ void NPairKokkos::build(NeighList *list_) const int factor = 1; #endif -if (GHOST) { - NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); - Kokkos::parallel_for(nall, f); -} else { - if (newton_pair) { - NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + if (GHOST) { + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + Kokkos::parallel_for(nall, f); + } else { + if (newton_pair) { + NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - Kokkos::parallel_for(config, f); + Kokkos::parallel_for(config, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif - } else { - NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + } else { + NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA - Kokkos::parallel_for(config, f); + Kokkos::parallel_for(config, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif - } -} - DeviceType::fence(); + } + } + DeviceType::fence(); deep_copy(data.h_resize, data.resize); if(data.h_resize()) { From 5289ec0b39f2c6600da1c246bcb59c2a668e5b56 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Feb 2017 18:39:04 -0500 Subject: [PATCH 142/267] cleanup: remove unused binatomsItem() declaration in npair_kokkos.h --- src/KOKKOS/npair_kokkos.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 54726cb971..87fa0b8aee 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -277,9 +277,6 @@ class NeighborKokkosExecute void build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const; #endif - KOKKOS_INLINE_FUNCTION - void binatomsItem(const int &i) const; - KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const { From c2ee3285fc79729d03e458ca2d5118699bd40c7a Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Feb 2017 21:54:41 -0500 Subject: [PATCH 143/267] USER-DPD: change nstencil_ssa[] to eliminate a corner case Saves a conditional inside an NPairHalfBinNewtonSSA::build() inner loop --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 3 +-- .../nstencil_half_bin_2d_newton_ssa.cpp | 17 +++++++++-------- .../nstencil_half_bin_3d_newton_ssa.cpp | 17 +++++++++-------- src/USER-DPD/nstencil_ssa.h | 3 ++- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 2c787d6398..8d260dd2be 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -207,8 +207,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) } // loop over all local atoms in other bins in "subphase" of stencil - k = (subphase > 0) ? nstencil_ssa[subphase - 1] : 0; - for (; k < nstencil_ssa[subphase]; k++) { + for (k = nstencil_ssa[subphase]; k < nstencil_ssa[subphase+1]; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp index 084d5b0602..5df65918d3 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp @@ -42,6 +42,7 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) : void NStencilHalfBin2dNewtonSSA::create() { int i,j,pos = 0; + nstencil_ssa[0] = 0; // redundant info, but saves a conditional // Subphase 0: upper right front bins (red) for (j = 0; j <= sy; j++) for (i = 0; i <= sx; i++) @@ -52,8 +53,8 @@ void NStencilHalfBin2dNewtonSSA::create() stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } - nstencil_ssa[0] = pos; + nstencil_ssa[1] = pos; // Subphase 1: upper left front bins (light blue) for (j = 1; j <= sy; j++) for (i = -sx; i < 0; i++) @@ -63,21 +64,21 @@ void NStencilHalfBin2dNewtonSSA::create() stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } - nstencil_ssa[1] = pos; - // Subphase 2: lower right front bins (yellow) nstencil_ssa[2] = pos; + // Subphase 2: lower right front bins (yellow) - // Subphase 3: lower left front bins (blue) nstencil_ssa[3] = pos; + // Subphase 3: lower left front bins (blue) + nstencil_ssa[4] = pos; // record end of half stencil // Now include additional bins for AIR ghosts, and impure-to-pure locals // Subphase 4: upper right back bins (pink) - nstencil_ssa[4] = pos; + // nstencil_ssa[5] = pos; // Subphase 5: upper left back bins (light green) - nstencil_ssa[5] = pos; + // nstencil_ssa[6] = pos; // Subphase 6: lower right back bins (white) for (j = -sy; j < 0; j++) for (i = 0; i <= sx; i++) @@ -87,8 +88,8 @@ void NStencilHalfBin2dNewtonSSA::create() stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } - nstencil_ssa[6] = pos; + // nstencil_ssa[7] = pos; // Subphase 7: lower left back bins (purple) for (j = -sy; j <= 0; j++) for (i = -sx; i < 0; i++) @@ -98,7 +99,7 @@ void NStencilHalfBin2dNewtonSSA::create() stencilxyz[pos][2] = 0; stencil[pos++] = j*mbinx + i; } - nstencil_ssa[7] = pos; + // nstencil_ssa[8] = pos; // Also, include the centroid for the AIR ghosts. stencilxyz[pos][0] = 0; diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp index 1741a1e847..3b1c85bdc1 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp @@ -42,6 +42,7 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) : void NStencilHalfBin3dNewtonSSA::create() { int i,j,k,pos = 0; + nstencil_ssa[0] = 0; // redundant info, but saves a conditional // Subphase 0: upper right front bins (red) for (k = 0; k <= sz; k++) for (j = 0; j <= sy; j++) @@ -53,8 +54,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[0] = pos; + nstencil_ssa[1] = pos; // Subphase 1: upper left front bins (light blue) for (k = 0; k <= sz; k++) for (j = 1; j <= sy; j++) @@ -65,8 +66,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[1] = pos; + nstencil_ssa[2] = pos; // Subphase 2: lower right front bins (yellow) for (k = 1; k <= sz; k++) for (j = -sy; j < 0; j++) @@ -77,8 +78,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[2] = pos; + nstencil_ssa[3] = pos; // Subphase 3: lower left front bins (blue) for (k = 1; k <= sz; k++) for (j = -sy; j <= 0; j++) @@ -89,8 +90,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[3] = pos; + nstencil_ssa[4] = pos; // record end of half stencil // Now include additional bins for AIR ghosts, and impure-to-pure locals // Subphase 4: upper right back bins (pink) for (k = -sz; k < 0; k++) @@ -102,8 +103,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[4] = pos; + // nstencil_ssa[5] = pos; // Subphase 5: upper left back bins (light green) for (k = -sz; k < 0; k++) for (j = 1; j <= sy; j++) @@ -114,8 +115,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[5] = pos; + // nstencil_ssa[6] = pos; // Subphase 6: lower right back bins (white) for (k = -sz; k <= 0; k++) for (j = -sy; j < 0; j++) @@ -126,8 +127,8 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[6] = pos; + // nstencil_ssa[7] = pos; // Subphase 7: lower left back bins (purple) for (k = -sz; k <= 0; k++) for (j = -sy; j <= 0; j++) @@ -138,7 +139,7 @@ void NStencilHalfBin3dNewtonSSA::create() stencilxyz[pos][2] = k; stencil[pos++] = k*mbiny*mbinx + j*mbinx + i; } - nstencil_ssa[7] = pos; + //nstencil_ssa[8] = pos; // Also, include the centroid for the AIR ghosts. stencilxyz[pos][0] = 0; diff --git a/src/USER-DPD/nstencil_ssa.h b/src/USER-DPD/nstencil_ssa.h index a5e3723271..f6f91fefde 100644 --- a/src/USER-DPD/nstencil_ssa.h +++ b/src/USER-DPD/nstencil_ssa.h @@ -24,7 +24,8 @@ class NStencilSSA : public NStencil { ~NStencilSSA() {} virtual void create() = 0; - int nstencil_ssa[8]; // last stencil index for each subphase + // first stencil index for each subphase, with last index at end + int nstencil_ssa[5]; }; } From d1a0a3e1c369254f173845ca6c4200f956eed0f5 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Feb 2017 22:00:56 -0500 Subject: [PATCH 144/267] USER-DPD: first attempt at nbin_ssa_kokkos... It compiles! --- src/KOKKOS/nbin_ssa_kokkos.cpp | 233 +++++++++++++++++++++++++++++++++ src/KOKKOS/nbin_ssa_kokkos.h | 193 +++++++++++++++++++++++++++ 2 files changed, 426 insertions(+) create mode 100644 src/KOKKOS/nbin_ssa_kokkos.cpp create mode 100644 src/KOKKOS/nbin_ssa_kokkos.h diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp new file mode 100644 index 0000000000..6ed8e9f3e4 --- /dev/null +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -0,0 +1,233 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: + James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation) +------------------------------------------------------------------------- */ + +#include "nbin_ssa_kokkos.h" +#include "neighbor.h" +#include "atom_kokkos.h" +#include "group.h" +#include "domain.h" +#include "comm.h" +#include "update.h" +#include "error.h" +#include "atom_masks.h" + +// #include "memory.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +NBinSSAKokkos::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp) +{ + atoms_per_bin = ghosts_per_gbin = 16; + + d_resize = typename AT::t_int_scalar("NBinSSAKokkos::d_resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(d_resize); +#else + h_resize = d_resize; +#endif + h_resize() = 1; + + k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8); + gbincount = k_gbincount.view(); +} + +/* ---------------------------------------------------------------------- */ + +template +void NBinSSAKokkos::bin_atoms_setup(int nall) +{ + if (mbins > (int) k_bins.d_view.dimension_0()) { + k_bins = DAT::tdual_int_2d("NBinSSAKokkos::bins",mbins,atoms_per_bin); + bins = k_bins.view(); + + k_bincount = DAT::tdual_int_1d("NBinSSAKokkos::bincount",mbins); + bincount = k_bincount.view(); + } + + ghosts_per_gbin = atom->nghost / 7; // estimate needed size + + if (ghosts_per_gbin > (int) k_gbins.d_view.dimension_1()) { + k_gbins = DAT::tdual_int_2d("NBinSSAKokkos::gbins",8,ghosts_per_gbin); + gbins = k_gbins.view(); + } + + // Clear the local bin extent bounding box. + h_lbinxlo() = mbinx - 1; // Safe to = stencil->sx + 1 + h_lbinylo() = mbiny - 1; // Safe to = stencil->sy + 1 + h_lbinzlo() = mbinz - 1; // Safe to = stencil->sz + 1 + h_lbinxhi() = 0; // Safe to = mbinx - stencil->sx - 1 + h_lbinyhi() = 0; // Safe to = mbiny - stencil->sy - 1 + h_lbinzhi() = 0; // Safe to = mbinz - stencil->sz - 1 + deep_copy(d_lbinxlo, h_lbinxlo); + deep_copy(d_lbinylo, h_lbinylo); + deep_copy(d_lbinzlo, h_lbinzlo); + deep_copy(d_lbinxhi, h_lbinxhi); + deep_copy(d_lbinyhi, h_lbinyhi); + deep_copy(d_lbinzhi, h_lbinzhi); +} + +/* ---------------------------------------------------------------------- + bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA) + local atoms are in distinct bins (binhead[]) from the ghosts + ghost atoms are "binned" in gairhead_ssa[] instead + ghosts which are not in an Active Interaction Region (AIR) are skipped +------------------------------------------------------------------------- */ + +template +void NBinSSAKokkos::bin_atoms() +{ + last_bin = update->ntimestep; + + int i; + + // bin the ghost atoms + h_resize() = 1; + while(h_resize() > 0) { + h_resize() = 0; + deep_copy(d_resize, h_resize); + + for (int i = 0; i < 8; i++) { + k_gbincount.h_view(i) = 0; + } + k_gbincount.modify(); + k_gbincount.sync(); + DeviceType::fence(); // FIXME? + + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + x = atomKK->k_x.view(); + + // I don't think these two lines need to be repeated here... - TIM 20170216 + sublo_[0] = domain->sublo[0]; + sublo_[1] = domain->sublo[1]; + sublo_[2] = domain->sublo[2]; + subhi_[0] = domain->subhi[0]; + subhi_[1] = domain->subhi[1]; + subhi_[2] = domain->subhi[2]; + + NPairSSAKokkosBinGhostsFunctor f(*this); + + Kokkos::parallel_for(atom->nghost, f); + DeviceType::fence(); + + deep_copy(h_resize, d_resize); + if(h_resize()) { + k_gbincount.modify(); + k_gbincount.sync(); + for (i = 1; i < 8; i++) { + if (k_gbincount.h_view(i) > ghosts_per_gbin) { + ghosts_per_gbin = k_gbincount.h_view(i); + } + } + k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin); + gbins = k_gbins.view(); + } + } + c_gbins = gbins; // gbins won't change until the next bin_atoms + + // bin the local atoms + h_resize() = 1; + while(h_resize() > 0) { + h_resize() = 0; + deep_copy(d_resize, h_resize); + + MemsetZeroFunctor f_zero; + f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); + Kokkos::parallel_for(mbins, f_zero); + DeviceType::fence(); + + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + x = atomKK->k_x.view(); + + // I don't think these two lines need to be repeated here... - TIM 20170216 + bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; + bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; + + NPairSSAKokkosBinAtomsFunctor f(*this); + + Kokkos::parallel_for(atom->nlocal, f); + DeviceType::fence(); + + deep_copy(h_resize, d_resize); + if(h_resize()) { + + atoms_per_bin += 16; + k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); + bins = k_bins.view(); + } + } + deep_copy(h_lbinxlo, d_lbinxlo); + deep_copy(h_lbinylo, d_lbinylo); + deep_copy(h_lbinzlo, d_lbinzlo); + deep_copy(h_lbinxhi, d_lbinxhi); + deep_copy(h_lbinyhi, d_lbinyhi); + deep_copy(h_lbinzhi, d_lbinzhi); + c_bins = bins; // bins won't change until the next bin_atoms +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::binGhostsItem(const int &i_) const +{ + const int i = i_ + atom->nlocal; + const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); + if (iAIR > 0) { // include only ghost atoms in an AIR + const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); + if(ac < (int) gbins.dimension_1()) { + gbins(iAIR, ac) = i; + } else { + d_resize() = 1; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::binAtomsItem(const int &i) const +{ + int loc[3]; + const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); + + // Find the bounding box of the local atoms in the bins + if (loc[0] < d_lbinxlo()) Kokkos::atomic_fetch_min(&d_lbinxlo(),loc[0]); + if (loc[0] >= d_lbinxhi()) Kokkos::atomic_fetch_max(&d_lbinxhi(),loc[0] + 1); + if (loc[1] < d_lbinylo()) Kokkos::atomic_fetch_min(&d_lbinylo(),loc[1]); + if (loc[1] >= d_lbinyhi()) Kokkos::atomic_fetch_max(&d_lbinyhi(),loc[1] + 1); + if (loc[2] < d_lbinzlo()) Kokkos::atomic_fetch_min(&d_lbinzlo(),loc[2]); + if (loc[2] >= d_lbinzhi()) Kokkos::atomic_fetch_max(&d_lbinzhi(),loc[2] + 1); + + const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1); + if(ac < (int) bins.dimension_1()) { + bins(ibin, ac) = i; + } else { + d_resize() = 1; + } +} + +namespace LAMMPS_NS { +template class NBinSSAKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class NBinSSAKokkos; +#endif +} diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h new file mode 100644 index 0000000000..a16cb2d0b7 --- /dev/null +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -0,0 +1,193 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NBIN_CLASS + +NBinStyle(ssa/kk/host, + NBinSSAKokkos, + NB_SSA | NB_KOKKOS_HOST) + +NBinStyle(ssa/kk/device, + NBinSSAKokkos, + NB_SSA | NB_KOKKOS_DEVICE) + +#else + +#ifndef LMP_NBIN_SSA_KOKKOS_H +#define LMP_NBIN_SSA_KOKKOS_H + +#include "nbin_standard.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class NBinSSAKokkos : public NBinStandard { + public: + typedef ArrayTypes AT; + + NBinSSAKokkos(class LAMMPS *); + ~NBinSSAKokkos() {} + void bin_atoms_setup(int); + void bin_atoms(); + + int atoms_per_bin; + DAT::tdual_int_1d k_bincount; + DAT::tdual_int_2d k_bins; + typename AT::t_int_1d bincount; + typename AT::t_int_2d bins; + typename AT::t_int_2d_const c_bins; + + int ghosts_per_gbin; + DAT::tdual_int_1d k_gbincount; + DAT::tdual_int_2d k_gbins; + typename AT::t_int_1d gbincount; + typename AT::t_int_2d gbins; + typename AT::t_int_2d_const c_gbins; + + typename AT::t_int_scalar d_resize; + typename ArrayTypes::t_int_scalar h_resize; + typename AT::t_x_array_randomread x; + + // Bounds of the local atoms in the bins array + typename AT::t_int_scalar d_lbinxlo; // lowest local bin x-dim coordinate + typename AT::t_int_scalar d_lbinylo; // lowest local bin y-dim coordinate + typename AT::t_int_scalar d_lbinzlo; // lowest local bin z-dim coordinate + typename AT::t_int_scalar d_lbinxhi; // highest local bin x-dim coordinate + typename AT::t_int_scalar d_lbinyhi; // highest local bin y-dim coordinate + typename AT::t_int_scalar d_lbinzhi; // highest local bin z-dim coordinate + typename ArrayTypes::t_int_scalar h_lbinxlo; + typename ArrayTypes::t_int_scalar h_lbinylo; + typename ArrayTypes::t_int_scalar h_lbinzlo; + typename ArrayTypes::t_int_scalar h_lbinxhi; + typename ArrayTypes::t_int_scalar h_lbinyhi; + typename ArrayTypes::t_int_scalar h_lbinzhi; + + + KOKKOS_INLINE_FUNCTION + void binAtomsItem(const int &i) const; + + KOKKOS_INLINE_FUNCTION + void binGhostsItem(const int &i) const; + +/* ---------------------------------------------------------------------- + convert atom coords into the ssa active interaction region number +------------------------------------------------------------------------- */ + KOKKOS_INLINE_FUNCTION + int coord2ssaAIR(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const + { + int ix, iy, iz; + ix = iy = iz = 0; + if (z < sublo_[2]) iz = -1; + if (z >= subhi_[2]) iz = 1; + if (y < sublo_[1]) iy = -1; + if (y >= subhi_[1]) iy = 1; + if (x < sublo_[0]) ix = -1; + if (x >= subhi_[0]) ix = 1; + if(iz < 0){ + return -1; + } else if(iz == 0){ + if( iy<0 ) return -1; // bottom left/middle/right + if( (iy==0) && (ix<0) ) return -1; // left atoms + if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms + if( (iy==0) && (ix>0) ) return 2; // Right atoms + if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms + if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms + } else { // iz > 0 + if((ix==0) && (iy==0)) return 4; // Back atoms + if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms + if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms + if((ix!=0) && (iy!=0)) return 7; // Back corner atoms + } + return -2; + } + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const + { + int ix,iy,iz; + + if (x >= bboxhi_[0]) + ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; + else if (x >= bboxlo_[0]) { + ix = static_cast ((x-bboxlo_[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; + + if (y >= bboxhi_[1]) + iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; + else if (y >= bboxlo_[1]) { + iy = static_cast ((y-bboxlo_[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; + + if (z >= bboxhi_[2]) + iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; + else if (z >= bboxlo_[2]) { + iz = static_cast ((z-bboxlo_[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; + + i[0] = ix - mbinxlo; + i[1] = iy - mbinylo; + i[2] = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + private: + double bboxlo_[3],bboxhi_[3]; + double sublo_[3], subhi_[3]; +}; + +template +struct NPairSSAKokkosBinGhostsFunctor { + typedef DeviceType device_type; + + const NBinSSAKokkos c; + + NPairSSAKokkosBinGhostsFunctor(const NBinSSAKokkos &_c): + c(_c) {}; + ~NPairSSAKokkosBinGhostsFunctor() {} + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.binGhostsItem(i); + } +}; + +template +struct NPairSSAKokkosBinAtomsFunctor { + typedef DeviceType device_type; + + const NBinSSAKokkos c; + + NPairSSAKokkosBinAtomsFunctor(const NBinSSAKokkos &_c): + c(_c) {}; + ~NPairSSAKokkosBinAtomsFunctor() {} + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.binAtomsItem(i); + } +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ From 7feb6c2853b6f2b8f67f84bda07cd9d0ab287e8b Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Feb 2017 22:41:32 -0500 Subject: [PATCH 145/267] USER-DPD: fix a bug in AtomVecDPDKokkos::unpack_restart() --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 820f11c215..f46f284f14 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -1668,7 +1668,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) double **extra = atom->extra; if (atom->nextra_store) { - int size = static_cast (ubuf(buf[m++]).i) - m; + int size = static_cast (buf[0]) - m; for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } From 37810bdc530209dc776f79bdbadf41c232919d04 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Feb 2017 23:06:53 -0500 Subject: [PATCH 146/267] USER-DPD: move centroid bin of stencil_ssa to the first slot. Eliminates a special case version of a loop just for Subphase 0. NOTE: pair evaluation order changes, causing numerical differences! This changed the order that close neighbors of ghosts are processed. --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 32 +++---------------- .../nstencil_half_bin_2d_newton_ssa.cpp | 14 ++++---- .../nstencil_half_bin_3d_newton_ssa.cpp | 14 ++++---- 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 8d260dd2be..14095bf349 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -180,35 +180,13 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) iatom = molatom[i]; tagprev = tag[i] - iatom - 1; } - // loop over rest of local atoms in i's bin if this is subphase 0 - // just store them, since j is beyond i in linked list - if (subphase == 0) for (j = bins[i]; j >= 0; j = bins[j]) { - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } - // loop over all local atoms in other bins in "subphase" of stencil + // loop over all local atoms in the current stencil "subphase" for (k = nstencil_ssa[subphase]; k < nstencil_ssa[subphase+1]; k++) { - for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + const int jbin = ibin+stencil[k]; + if (jbin != ibin) j = binhead[jbin]; + else j = bins[i]; // same bin as i, so start just past i in the bin + for (; j >= 0; j = bins[j]) { jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; delx = xtmp - x[j][0]; diff --git a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp index 5df65918d3..451381c104 100644 --- a/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp @@ -43,6 +43,14 @@ void NStencilHalfBin2dNewtonSSA::create() { int i,j,pos = 0; nstencil_ssa[0] = 0; // redundant info, but saves a conditional + + // Include the centroid at the start. + // It will be handled as part of Subphase 0. + stencilxyz[pos][0] = 0; + stencilxyz[pos][1] = 0; + stencilxyz[pos][2] = 0; + stencil[pos++] = 0; + // Subphase 0: upper right front bins (red) for (j = 0; j <= sy; j++) for (i = 0; i <= sx; i++) @@ -101,11 +109,5 @@ void NStencilHalfBin2dNewtonSSA::create() } // nstencil_ssa[8] = pos; - // Also, include the centroid for the AIR ghosts. - stencilxyz[pos][0] = 0; - stencilxyz[pos][1] = 0; - stencilxyz[pos][2] = 0; - stencil[pos++] = 0; - nstencil = pos; // record where full stencil ends } diff --git a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp index 3b1c85bdc1..cdd3b8856f 100644 --- a/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp +++ b/src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp @@ -43,6 +43,14 @@ void NStencilHalfBin3dNewtonSSA::create() { int i,j,k,pos = 0; nstencil_ssa[0] = 0; // redundant info, but saves a conditional + + // Include the centroid at the start. + // It will be handled as part of Subphase 0. + stencilxyz[pos][0] = 0; + stencilxyz[pos][1] = 0; + stencilxyz[pos][2] = 0; + stencil[pos++] = 0; + // Subphase 0: upper right front bins (red) for (k = 0; k <= sz; k++) for (j = 0; j <= sy; j++) @@ -141,11 +149,5 @@ void NStencilHalfBin3dNewtonSSA::create() } //nstencil_ssa[8] = pos; - // Also, include the centroid for the AIR ghosts. - stencilxyz[pos][0] = 0; - stencilxyz[pos][1] = 0; - stencilxyz[pos][2] = 0; - stencil[pos++] = 0; - nstencil = pos; // record where full stencil ends } From 19ffe5931529b9c49cf9bc656c2e9fa01248aa3c Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 18 Feb 2017 00:32:14 -0500 Subject: [PATCH 147/267] USER-DPD: fix typo in NPairHalfBinNewtonSSA::build(): sz1 instead of sx1 Luckily, no real change, since sz1 and sx1 are normally identical. --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 14095bf349..ab439d3731 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -153,7 +153,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) int workItem = 0; for (zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (ybin = lbinylo + yoff - ns_ssa->sy; ybin < lbinyhi; ybin += sy1) { - for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sz1) { + for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sx1) { if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); ssa_itemLoc[workPhase][workItem] = inum; // record where workItem starts in ilist From 5c6e7b12c647a21d45e8d6460e53a5ff64f277d6 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 18 Feb 2017 01:09:02 -0500 Subject: [PATCH 148/267] BUGFIX: fix a copy-o in build_Item_Ghost(): xbin2, etc. should be an int xbin2, ybin2, and zbin2 are temporary integer bin coordinates, not floats! --- src/KOKKOS/npair_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4f17835717..c750918695 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -724,9 +724,9 @@ void NeighborKokkosExecute:: const int ybin = binxyz[1]; const int zbin = binxyz[2]; for (int k = 0; k < nstencil; k++) { - const X_FLOAT xbin2 = xbin + stencilxyz(k,0); - const X_FLOAT ybin2 = ybin + stencilxyz(k,1); - const X_FLOAT zbin2 = zbin + stencilxyz(k,2); + const int xbin2 = xbin + stencilxyz(k,0); + const int ybin2 = ybin + stencilxyz(k,1); + const int zbin2 = zbin + stencilxyz(k,2); if (xbin2 < 0 || xbin2 >= mbinx || ybin2 < 0 || ybin2 >= mbiny || zbin2 < 0 || zbin2 >= mbinz) continue; From 01d0a5c4a210617fb47b9aa85a79eeccd715fa5f Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 18 Feb 2017 01:38:55 -0500 Subject: [PATCH 149/267] BUGFIX: use Kokkos::atomic_fetch_max() to avoid a race on new_maxneighs --- src/KOKKOS/npair_kokkos.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index c750918695..5bfa147def 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -422,10 +422,10 @@ void NeighborKokkosExecute:: neigh_list.d_numneigh(i) = n; - if(n >= neigh_list.maxneighs) { + if(n > neigh_list.maxneighs) { resize() = 1; - if(n >= new_maxneighs()) new_maxneighs() = n; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); } neigh_list.d_ilist(i) = i; @@ -632,10 +632,10 @@ void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPoli neigh_list.d_ilist(i) = i; } - if(n >= neigh_list.maxneighs) { + if(n > neigh_list.maxneighs) { resize() = 1; - if(n >= new_maxneighs()) new_maxneighs() = n; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); } } } @@ -755,10 +755,10 @@ void NeighborKokkosExecute:: neigh_list.d_numneigh(i) = n; - if(n >= neigh_list.maxneighs) { + if(n > neigh_list.maxneighs) { resize() = 1; - if(n >= new_maxneighs()) new_maxneighs() = n; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); } neigh_list.d_ilist(i) = i; } From 8065d967612a04ce0f6e3f8286d7c66fe1e48f0d Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 18 Feb 2017 03:14:32 -0500 Subject: [PATCH 150/267] USER-DPD: first attempt at npair_ssa_kokkos... It compiles! --- src/KOKKOS/npair_ssa_kokkos.cpp | 539 ++++++++++++++++++++++++++++++++ src/KOKKOS/npair_ssa_kokkos.h | 334 ++++++++++++++++++++ 2 files changed, 873 insertions(+) create mode 100644 src/KOKKOS/npair_ssa_kokkos.cpp create mode 100644 src/KOKKOS/npair_ssa_kokkos.h diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp new file mode 100644 index 0000000000..752fc0c938 --- /dev/null +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -0,0 +1,539 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: + James Larentzos and Timothy I. Mattox (Engility Corporation) +------------------------------------------------------------------------- */ + +#include "npair_ssa_kokkos.h" +#include "neigh_list.h" +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "domain_kokkos.h" +#include "neighbor_kokkos.h" +#include "nbin_ssa_kokkos.h" +#include "nstencil_ssa.h" +#include "error.h" + +namespace LAMMPS_NS { + +/* ---------------------------------------------------------------------- */ + +template +NPairSSAKokkos::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27) +{ +} + +/* ---------------------------------------------------------------------- + copy needed info from Neighbor class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairSSAKokkos::copy_neighbor_info() +{ + NPair::copy_neighbor_info(); + + NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor; + + // general params + + k_cutneighsq = neighborKK->k_cutneighsq; + + // exclusion info + + k_ex1_type = neighborKK->k_ex1_type; + k_ex2_type = neighborKK->k_ex2_type; + k_ex_type = neighborKK->k_ex_type; + k_ex1_group = neighborKK->k_ex1_group; + k_ex2_group = neighborKK->k_ex2_group; + k_ex1_bit = neighborKK->k_ex1_bit; + k_ex2_bit = neighborKK->k_ex2_bit; + k_ex_mol_group = neighborKK->k_ex_mol_group; + k_ex_mol_bit = neighborKK->k_ex_mol_bit; +} + +/* ---------------------------------------------------------------------- + copy per-atom and per-bin vectors from NBinSSAKokkos class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairSSAKokkos::copy_bin_info() +{ + NPair::copy_bin_info(); + + NBinSSAKokkos* nbKK = dynamic_cast*>(nb); + if (!nbKK) error->one(FLERR, "NBin wasn't a NBinSSAKokkos object"); + + atoms_per_bin = nbKK->atoms_per_bin; + k_bincount = nbKK->k_bincount; + k_bins = nbKK->k_bins; + + ghosts_per_gbin = nbKK->ghosts_per_gbin; + k_gbincount = nbKK->k_gbincount; + k_gbins = nbKK->k_gbins; + + lbinxlo = nbKK->d_lbinxlo(); + lbinxhi = nbKK->d_lbinxhi(); + lbinylo = nbKK->d_lbinylo(); + lbinyhi = nbKK->d_lbinyhi(); + lbinzlo = nbKK->d_lbinzlo(); + lbinzhi = nbKK->d_lbinzhi(); +} + +/* ---------------------------------------------------------------------- + copy needed info from NStencil class to this build class + ------------------------------------------------------------------------- */ + +template +void NPairSSAKokkos::copy_stencil_info() +{ + NPair::copy_stencil_info(); + + nstencil = ns->nstencil; + + int maxstencil = ns->get_maxstencil(); + + k_stencil = DAT::tdual_int_1d("NPairSSAKokkos:stencil",maxstencil); + for (int k = 0; k < maxstencil; k++) { + k_stencil.h_view(k) = ns->stencil[k]; + } + k_stencil.modify(); + k_stencil.sync(); + k_stencilxyz = DAT::tdual_int_1d_3("NPairSSAKokkos:stencilxyz",maxstencil); + for (int k = 0; k < maxstencil; k++) { + k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; + k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; + k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + } + k_stencilxyz.modify(); + k_stencilxyz.sync(); + + NStencilSSA *ns_ssa = dynamic_cast(ns); + if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object"); + + k_nstencil_ssa = DAT::tdual_int_1d("NPairSSAKokkos:nstencil_ssa",8); + for (int k = 0; k < 8; ++k) { + k_nstencil_ssa.h_view(k) = ns_ssa->nstencil_ssa[k]; + } + k_nstencil_ssa.modify(); + k_nstencil_ssa.sync(); + sx1 = ns_ssa->sx + 1; + sy1 = ns_ssa->sy + 1; + sz1 = ns_ssa->sz + 1; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int NPairSSAKokkosExecute::find_special(const int &i, const int &j) const +{ + const int n1 = nspecial(i,0); + const int n2 = nspecial(i,1); + const int n3 = nspecial(i,2); + + for (int k = 0; k < n3; k++) { + if (special(i,k) == tag(j)) { + if (k < n1) { + if (special_flag[1] == 0) return -1; + else if (special_flag[1] == 1) return 0; + else return 1; + } else if (k < n2) { + if (special_flag[2] == 0) return -1; + else if (special_flag[2] == 1) return 0; + else return 2; + } else { + if (special_flag[3] == 0) return -1; + else if (special_flag[3] == 1) return 0; + else return 3; + } + } + } + return 0; +}; + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int NPairSSAKokkosExecute::exclusion(const int &i,const int &j, + const int &itype,const int &jtype) const +{ + int m; + + if (nex_type && ex_type(itype,jtype)) return 1; + + if (nex_group) { + for (m = 0; m < nex_group; m++) { + if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1; + if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1; + } + } + + if (nex_mol) { + for (m = 0; m < nex_mol; m++) + if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) && + molecule(i) == molecule(j)) return 1; + } + + return 0; +} + +/* ---------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------- + binned neighbor list construction with full Newton's 3rd law + for use by Shardlow Spliting Algorithm + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +template +void NPairSSAKokkos::build(NeighList *list_) +{ + NeighListKokkos* list = (NeighListKokkos*) list_; + const int nlocal = includegroup?atom->nfirst:atom->nlocal; + const int nl_size = (nlocal + atom->nghost) * 4; + list->grow(nl_size); // Make special larger SSA neighbor list + + ssa_phaseCt = sz1*sy1*sx1; + + int xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1; + int ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; + int zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; + int phaseLenEstimate = xbin*ybin*zbin; + + if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) { + k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt); + ssa_phaseLen = k_ssa_phaseLen.view(); + } + if ((ssa_phaseCt > (int) k_ssa_itemLoc.dimension_0()) || + (phaseLenEstimate > (int) k_ssa_itemLoc.dimension_1())) { + k_ssa_itemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLoc",ssa_phaseCt,phaseLenEstimate); + ssa_itemLoc = k_ssa_itemLoc.view(); + k_ssa_itemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLen",ssa_phaseCt,phaseLenEstimate); + ssa_itemLen = k_ssa_itemLen.view(); + } + + NPairSSAKokkosExecute + data(*list, + k_cutneighsq.view(), + k_bincount.view(), + k_bins.view(), + k_gbincount.view(), + k_gbins.view(), + lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi, + nstencil, sx1, sy1, sz1, + k_stencil.view(), + k_stencilxyz.view(), + k_nstencil_ssa.view(), + ssa_phaseCt, + k_ssa_phaseLen.view(), + k_ssa_itemLoc.view(), + k_ssa_itemLen.view(), + nlocal, + atomKK->k_x.view(), + atomKK->k_type.view(), + atomKK->k_mask.view(), + atomKK->k_molecule.view(), + atomKK->k_tag.view(), + atomKK->k_special.view(), + atomKK->k_nspecial.view(), + atomKK->molecular, + nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, + bininvx,bininvy,bininvz, + exclude, nex_type, + k_ex1_type.view(), + k_ex2_type.view(), + k_ex_type.view(), + nex_group, + k_ex1_group.view(), + k_ex2_group.view(), + k_ex1_bit.view(), + k_ex2_bit.view(), + nex_mol, + k_ex_mol_group.view(), + k_ex_mol_bit.view(), + bboxhi,bboxlo, + domain->xperiodic,domain->yperiodic,domain->zperiodic, + domain->xprd_half,domain->yprd_half,domain->zprd_half); + + k_cutneighsq.sync(); + k_ex1_type.sync(); + k_ex2_type.sync(); + k_ex_type.sync(); + k_ex1_group.sync(); + k_ex2_group.sync(); + k_ex1_bit.sync(); + k_ex2_bit.sync(); + k_ex_mol_group.sync(); + k_ex_mol_bit.sync(); + k_bincount.sync(); + k_bins.sync(); + k_gbincount.sync(); + k_gbins.sync(); + atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + + data.special_flag[0] = special_flag[0]; + data.special_flag[1] = special_flag[1]; + data.special_flag[2] = special_flag[2]; + data.special_flag[3] = special_flag[3]; + + data.h_resize()=1; + while(data.h_resize()) { + data.h_new_maxneighs() = list->maxneighs; + data.h_resize() = 0; + + Kokkos::deep_copy(data.resize, data.h_resize); + Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); + +#ifdef NOTYET + NPairSSAKokkosBuildFunctor f(data,atoms_per_bin*5*sizeof(X_FLOAT)); + Kokkos::parallel_for(nall, f); +#endif + data.build_locals(); + data.build_ghosts(); + + DeviceType::fence(); + deep_copy(data.h_resize, data.resize); + + if(data.h_resize()) { + deep_copy(data.h_new_maxneighs, data.new_maxneighs); + list->maxneighs = data.h_new_maxneighs() * 1.2; + list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs); + data.neigh_list.d_neighbors = list->d_neighbors; + data.neigh_list.maxneighs = list->maxneighs; + } + } + + k_ssa_phaseLen.modify(); + k_ssa_itemLoc.modify(); + k_ssa_itemLen.modify(); + + list->k_ilist.template modify(); +} + + +template +void NPairSSAKokkosExecute::build_locals() +{ + int n = 0; + int which = 0; + int inum = 0; + + int workPhase = 0; + // loop over bins with local atoms, storing half of the neighbors + for (int zoff = sz1 - 1; zoff >= 0; --zoff) { + for (int yoff = sy1 - 1; yoff >= 0; --yoff) { + for (int xoff = sx1 - 1; xoff >= 0; --xoff) { + int workItem = 0; + for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { + for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { + for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { +// if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); + d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem starts in ilist + + for (int subphase = 0; subphase < 4; subphase++) { + int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); + int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0); + if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue; + if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; + + int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; + for (int il = 0; il < c_bincount(ibin); ++il) { + const int i = c_bins(ibin, il); + n = 0; + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + + // loop over all local atoms in the current stencil "subphase" + for (int k = d_nstencil_ssa(subphase); k < d_nstencil_ssa(subphase+1); k++) { + const int jbin = ibin+stencil(k); + int jl; + if (jbin != ibin) jl = 0; + else jl = il + 1; // same bin as i, so start just past i in the bin + for (; jl < c_bincount(jbin); ++jl) { + const int j = c_bins(jbin, jl); + const int jtype = type(j); + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n 0) { + neigh_list.d_numneigh(inum) = n; + neigh_list.d_ilist(inum++) = i; + if(n > neigh_list.maxneighs) { + resize() = 1; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + } + } + } + } + // record where workItem ends in ilist + d_ssa_itemLen(workPhase,workItem) = inum - d_ssa_itemLoc(workPhase,workItem); + if (d_ssa_itemLen(workPhase,workItem) > 0) workItem++; + } + } + } + + // record where workPhase ends + d_ssa_phaseLen(workPhase++) = workItem; + } + } + } + +//FIXME if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); + + neigh_list.inum = inum; //FIXME +} + + +template +void NPairSSAKokkosExecute::build_ghosts() +{ + int n = 0; + int which = 0; + int inum = neigh_list.inum; + int gnum = 0; + neigh_list.AIRct_ssa[0] = inum; //FIXME + + // loop over AIR ghost atoms, storing their local neighbors + // since these are ghosts, must check if stencil bin is out of bounds + for (int airnum = 1; airnum <= 7; airnum++) { + int locAIRct = 0; + for (int il = 0; il < c_gbincount(airnum); ++il) { + const int i = c_gbins(airnum, il); + n = 0; + + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + + int loc[3]; + const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); + + // loop over AIR ghost atoms in all bins in "full" stencil + // Note: the non-AIR ghost atoms have already been filtered out + for (int k = 0; k < nstencil; k++) { + int xbin2 = loc[0] + d_stencilxyz(k,0); + int ybin2 = loc[1] + d_stencilxyz(k,1); + int zbin2 = loc[2] + d_stencilxyz(k,2); + // Skip it if this bin is outside the extent of local bins + if (xbin2 < lbinxlo || xbin2 >= lbinxhi || + ybin2 < lbinylo || ybin2 >= lbinyhi || + zbin2 < lbinzlo || zbin2 >= lbinzhi) continue; + const int jbin = ibin+stencil(k); + for (int jl = 0; jl < c_bincount(jbin); ++jl) { + const int j = c_bins(jbin, jl); + const int jtype = type(j); + if(exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if(rsq <= cutneighsq(itype,jtype)) { + if (molecular) { + if (!moltemplate) + which = find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n 0) { + neigh_list.d_numneigh(inum + gnum) = n; + neigh_list.d_ilist(inum + (gnum++)) = i; + if(n > neigh_list.maxneighs) { + resize() = 1; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + } + ++locAIRct; + } + } + neigh_list.AIRct_ssa[airnum] = locAIRct; //FIXME + } + neigh_list.gnum = gnum; //FIXME +} + +} + +namespace LAMMPS_NS { +template class NPairSSAKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class NPairSSAKokkos; +#endif +} diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h new file mode 100644 index 0000000000..a656fe32ba --- /dev/null +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -0,0 +1,334 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS + +typedef NPairSSAKokkos NPairSSAKokkosHost; +NPairStyle(half/bin/newton/ssa/kk/host, + NPairSSAKokkosHost, + NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_HOST) + +typedef NPairSSAKokkos NPairSSAKokkosDevice; +NPairStyle(half/bin/newton/ssa/kk/device, + NPairSSAKokkosDevice, + NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_DEVICE) + +#else + +#ifndef LMP_NPAIR_SSA_KOKKOS_H +#define LMP_NPAIR_SSA_KOKKOS_H + +#include "npair.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class NPairSSAKokkos : public NPair { + public: + typedef ArrayTypes AT; + + // SSA Work plan data structures + int ssa_phaseCt; + DAT::tdual_int_1d k_ssa_phaseLen; + DAT::tdual_int_2d k_ssa_itemLoc; + DAT::tdual_int_2d k_ssa_itemLen; + typename AT::t_int_1d ssa_phaseLen; + typename AT::t_int_2d ssa_itemLoc; + typename AT::t_int_2d ssa_itemLen; + + NPairSSAKokkos(class LAMMPS *); + ~NPairSSAKokkos() {} + void copy_neighbor_info(); + void copy_bin_info(); + void copy_stencil_info(); + void build(class NeighList *); + private: + // data from Neighbor class + + DAT::tdual_xfloat_2d k_cutneighsq; + + // exclusion data from Neighbor class + + DAT::tdual_int_1d k_ex1_type,k_ex2_type; + DAT::tdual_int_2d k_ex_type; + DAT::tdual_int_1d k_ex1_group,k_ex2_group; + DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; + DAT::tdual_int_1d k_ex_mol_group; + DAT::tdual_int_1d k_ex_mol_bit; + + // data from NBinSSA class + + int atoms_per_bin; + DAT::tdual_int_1d k_bincount; + DAT::tdual_int_2d k_bins; + int ghosts_per_gbin; + DAT::tdual_int_1d k_gbincount; + DAT::tdual_int_2d k_gbins; + int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi; + + // data from NStencilSSA class + + int nstencil; + DAT::tdual_int_1d k_stencil; // # of J neighs for each I + DAT::tdual_int_1d_3 k_stencilxyz; + DAT::tdual_int_1d k_nstencil_ssa; + int sx1, sy1, sz1; +}; + +template +class NPairSSAKokkosExecute +{ + typedef ArrayTypes AT; + + public: + NeighListKokkos neigh_list; + + // data from Neighbor class + + const typename AT::t_xfloat_2d_randomread cutneighsq; + + // exclusion data from Neighbor class + + const int exclude; + + const int nex_type; + const typename AT::t_int_1d_const ex1_type,ex2_type; + const typename AT::t_int_2d_const ex_type; + + const int nex_group; + const typename AT::t_int_1d_const ex1_group,ex2_group; + const typename AT::t_int_1d_const ex1_bit,ex2_bit; + + const int nex_mol; + const typename AT::t_int_1d_const ex_mol_group; + const typename AT::t_int_1d_const ex_mol_bit; + + // data from NBinSSA class + + const typename AT::t_int_1d bincount; + const typename AT::t_int_1d_const c_bincount; + typename AT::t_int_2d bins; + typename AT::t_int_2d_const c_bins; + const typename AT::t_int_1d gbincount; + const typename AT::t_int_1d_const c_gbincount; + typename AT::t_int_2d gbins; + typename AT::t_int_2d_const c_gbins; + const int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi; + + + // data from NStencil class + + const int nstencil; + const int sx1, sy1, sz1; + typename AT::t_int_1d d_stencil; // # of J neighs for each I + typename AT::t_int_1d_3 d_stencilxyz; + typename AT::t_int_1d d_nstencil_ssa; + + // data from Atom class + + const typename AT::t_x_array_randomread x; + const typename AT::t_int_1d_const type,mask; + const typename AT::t_tagint_1d_const molecule; + const typename AT::t_tagint_1d_const tag; + const typename AT::t_tagint_2d_const special; + const typename AT::t_int_2d_const nspecial; + const int molecular; + int moltemplate; + + int special_flag[4]; + + const int nbinx,nbiny,nbinz; + const int mbinx,mbiny,mbinz; + const int mbinxlo,mbinylo,mbinzlo; + const X_FLOAT bininvx,bininvy,bininvz; + X_FLOAT bboxhi[3],bboxlo[3]; + + const int nlocal; + + typename AT::t_int_scalar resize; + typename AT::t_int_scalar new_maxneighs; + typename ArrayTypes::t_int_scalar h_resize; + typename ArrayTypes::t_int_scalar h_new_maxneighs; + + const int xperiodic, yperiodic, zperiodic; + const int xprd_half, yprd_half, zprd_half; + + // SSA Work plan data structures + int ssa_phaseCt; + typename AT::t_int_1d d_ssa_phaseLen; + typename AT::t_int_2d d_ssa_itemLoc; + typename AT::t_int_2d d_ssa_itemLen; + + NPairSSAKokkosExecute( + const NeighListKokkos &_neigh_list, + const typename AT::t_xfloat_2d_randomread &_cutneighsq, + const typename AT::t_int_1d &_bincount, + const typename AT::t_int_2d &_bins, + const typename AT::t_int_1d &_gbincount, + const typename AT::t_int_2d &_gbins, + const int _lbinxlo, const int _lbinxhi, + const int _lbinylo, const int _lbinyhi, + const int _lbinzlo, const int _lbinzhi, + const int _nstencil, const int _sx1, const int _sy1, const int _sz1, + const typename AT::t_int_1d &_d_stencil, + const typename AT::t_int_1d_3 &_d_stencilxyz, + const typename AT::t_int_1d &_d_nstencil_ssa, + const int _ssa_phaseCt, + const typename AT::t_int_1d &_d_ssa_phaseLen, + const typename AT::t_int_2d &_d_ssa_itemLoc, + const typename AT::t_int_2d &_d_ssa_itemLen, + const int _nlocal, + const typename AT::t_x_array_randomread &_x, + const typename AT::t_int_1d_const &_type, + const typename AT::t_int_1d_const &_mask, + const typename AT::t_tagint_1d_const &_molecule, + const typename AT::t_tagint_1d_const &_tag, + const typename AT::t_tagint_2d_const &_special, + const typename AT::t_int_2d_const &_nspecial, + const int &_molecular, + const int & _nbinx,const int & _nbiny,const int & _nbinz, + const int & _mbinx,const int & _mbiny,const int & _mbinz, + const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, + const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, + const int & _exclude,const int & _nex_type, + const typename AT::t_int_1d_const & _ex1_type, + const typename AT::t_int_1d_const & _ex2_type, + const typename AT::t_int_2d_const & _ex_type, + const int & _nex_group, + const typename AT::t_int_1d_const & _ex1_group, + const typename AT::t_int_1d_const & _ex2_group, + const typename AT::t_int_1d_const & _ex1_bit, + const typename AT::t_int_1d_const & _ex2_bit, + const int & _nex_mol, + const typename AT::t_int_1d_const & _ex_mol_group, + const typename AT::t_int_1d_const & _ex_mol_bit, + const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, + const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, + const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): + neigh_list(_neigh_list), cutneighsq(_cutneighsq), + bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), + gbincount(_gbincount),c_gbincount(_gbincount),gbins(_gbins),c_gbins(_gbins), + lbinxlo(_lbinxlo),lbinxhi(_lbinxhi), + lbinylo(_lbinylo),lbinyhi(_lbinyhi), + lbinzlo(_lbinzlo),lbinzhi(_lbinzhi), + nstencil(_nstencil),sx1(_sx1),sy1(_sy1),sz1(_sz1), + d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),d_nstencil_ssa(_d_nstencil_ssa), + ssa_phaseCt(_ssa_phaseCt), + d_ssa_phaseLen(_d_ssa_phaseLen), + d_ssa_itemLoc(_d_ssa_itemLoc), + d_ssa_itemLen(_d_ssa_itemLen), + nlocal(_nlocal), + x(_x),type(_type),mask(_mask),molecule(_molecule), + tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), + nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), + mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), + mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), + bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), + exclude(_exclude),nex_type(_nex_type), + ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), + nex_group(_nex_group), + ex1_group(_ex1_group),ex2_group(_ex2_group), + ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), + ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), + xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), + xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { + + if (molecular == 2) moltemplate = 1; + else moltemplate = 0; + + bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; + bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; + + resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); +#ifndef KOKKOS_USE_CUDA_UVM + h_resize = Kokkos::create_mirror_view(resize); +#else + h_resize = resize; +#endif + h_resize() = 1; + new_maxneighs = typename AT:: + t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); +#ifndef KOKKOS_USE_CUDA_UVM + h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); +#else + h_new_maxneighs = new_maxneighs; +#endif + h_new_maxneighs() = neigh_list.maxneighs; + }; + + ~NPairSSAKokkosExecute() {neigh_list.clean_copy();}; + + void build_locals(); + void build_ghosts(); + + KOKKOS_INLINE_FUNCTION + int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const + { + int ix,iy,iz; + + if (x >= bboxhi[0]) + ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; + else if (x >= bboxlo[0]) { + ix = static_cast ((x-bboxlo[0])*bininvx); + ix = MIN(ix,nbinx-1); + } else + ix = static_cast ((x-bboxlo[0])*bininvx) - 1; + + if (y >= bboxhi[1]) + iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; + else if (y >= bboxlo[1]) { + iy = static_cast ((y-bboxlo[1])*bininvy); + iy = MIN(iy,nbiny-1); + } else + iy = static_cast ((y-bboxlo[1])*bininvy) - 1; + + if (z >= bboxhi[2]) + iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; + else if (z >= bboxlo[2]) { + iz = static_cast ((z-bboxlo[2])*bininvz); + iz = MIN(iz,nbinz-1); + } else + iz = static_cast ((z-bboxlo[2])*bininvz) - 1; + + i[0] = ix - mbinxlo; + i[1] = iy - mbinylo; + i[2] = iz - mbinzlo; + + return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); + } + + KOKKOS_INLINE_FUNCTION + int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const; + + KOKKOS_INLINE_FUNCTION + int find_special(const int &i, const int &j) const; + + KOKKOS_INLINE_FUNCTION + int minimum_image_check(double dx, double dy, double dz) const { + if (xperiodic && fabs(dx) > xprd_half) return 1; + if (yperiodic && fabs(dy) > yprd_half) return 1; + if (zperiodic && fabs(dz) > zprd_half) return 1; + return 0; + } + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ From b27cc8f474e1a1284d242d209e5fa3ba0e77c5f7 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 20 Feb 2017 14:09:11 -0500 Subject: [PATCH 151/267] USER-DPD: use LAMBDA instead of functor for ghost binning in nbin_ssa_kokkos --- src/KOKKOS/nbin_ssa_kokkos.cpp | 32 +++++++++++--------------------- src/KOKKOS/nbin_ssa_kokkos.h | 18 ------------------ 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 6ed8e9f3e4..32a77119de 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -122,9 +122,17 @@ void NBinSSAKokkos::bin_atoms() subhi_[1] = domain->subhi[1]; subhi_[2] = domain->subhi[2]; - NPairSSAKokkosBinGhostsFunctor f(*this); - - Kokkos::parallel_for(atom->nghost, f); + Kokkos::parallel_for(Kokkos::RangePolicy(atom->nlocal,atom->nlocal+atom->nghost), KOKKOS_LAMBDA (const int i) { + const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); + if (iAIR > 0) { // include only ghost atoms in an AIR + const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); + if(ac < (int) gbins.dimension_1()) { + gbins(iAIR, ac) = i; + } else { + d_resize() = 1; + } + } + }); DeviceType::fence(); deep_copy(h_resize, d_resize); @@ -184,24 +192,6 @@ void NBinSSAKokkos::bin_atoms() /* ---------------------------------------------------------------------- */ -template -KOKKOS_INLINE_FUNCTION -void NBinSSAKokkos::binGhostsItem(const int &i_) const -{ - const int i = i_ + atom->nlocal; - const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); - if (iAIR > 0) { // include only ghost atoms in an AIR - const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); - if(ac < (int) gbins.dimension_1()) { - gbins(iAIR, ac) = i; - } else { - d_resize() = 1; - } - } -} - -/* ---------------------------------------------------------------------- */ - template KOKKOS_INLINE_FUNCTION void NBinSSAKokkos::binAtomsItem(const int &i) const diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h index a16cb2d0b7..488c1034f5 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.h +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -77,9 +77,6 @@ class NBinSSAKokkos : public NBinStandard { KOKKOS_INLINE_FUNCTION void binAtomsItem(const int &i) const; - KOKKOS_INLINE_FUNCTION - void binGhostsItem(const int &i) const; - /* ---------------------------------------------------------------------- convert atom coords into the ssa active interaction region number ------------------------------------------------------------------------- */ @@ -153,21 +150,6 @@ class NBinSSAKokkos : public NBinStandard { double sublo_[3], subhi_[3]; }; -template -struct NPairSSAKokkosBinGhostsFunctor { - typedef DeviceType device_type; - - const NBinSSAKokkos c; - - NPairSSAKokkosBinGhostsFunctor(const NBinSSAKokkos &_c): - c(_c) {}; - ~NPairSSAKokkosBinGhostsFunctor() {} - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.binGhostsItem(i); - } -}; - template struct NPairSSAKokkosBinAtomsFunctor { typedef DeviceType device_type; From 1db62a57b5ddbd579f1040d977ee659b2c377f89 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 13:17:49 -0500 Subject: [PATCH 152/267] USER-DPD: pair_dpd_fdt_energy_kokkos: enable STACKPARAMS specialization --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 117 ++++++++++++++-------- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 24 ++--- 2 files changed, 89 insertions(+), 52 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 84a489bcc3..aaf638fac3 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -49,7 +49,6 @@ PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - STACKPARAMS = 0; } /* ---------------------------------------------------------------------- */ @@ -171,21 +170,41 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (splitFDT_flag) { if (!a0_is_zero) { - if (neighflag == HALF) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if(atom->ntypes > MAX_TYPES_STACKPARAMS) { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } - } else if (neighflag == HALFTHREAD) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } } @@ -209,21 +228,41 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) // loop over neighbors of my atoms - if (neighflag == HALF) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if(atom->ntypes > MAX_TYPES_STACKPARAMS) { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } - } else if (neighflag == HALFTHREAD) { - if (newton_pair) { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - } else { - if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } @@ -270,9 +309,9 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyZero, con } template -template +template KOKKOS_INLINE_FUNCTION -void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii, EV_FLOAT& ev) const { +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii, EV_FLOAT& ev) const { // The f array is atomic for Half/Thread neighbor style Kokkos::View::value> > a_f = f; @@ -346,17 +385,17 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp } template -template +template KOKKOS_INLINE_FUNCTION -void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii) const { +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSplit, const int &ii) const { EV_FLOAT ev; - this->template operator()(TagPairDPDfdtEnergyComputeSplit(), ii, ev); + this->template operator()(TagPairDPDfdtEnergyComputeSplit(), ii, ev); } template -template +template KOKKOS_INLINE_FUNCTION -void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii, EV_FLOAT& ev) const { +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii, EV_FLOAT& ev) const { // These array are atomic for Half/Thread neighbor style Kokkos::View::value> > a_f = f; @@ -503,11 +542,11 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo } template -template +template KOKKOS_INLINE_FUNCTION -void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii) const { +void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNoSplit, const int &ii) const { EV_FLOAT ev; - this->template operator()(TagPairDPDfdtEnergyComputeNoSplit(), ii, ev); + this->template operator()(TagPairDPDfdtEnergyComputeNoSplit(), ii, ev); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 7d1749eb94..9689712273 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -32,10 +32,10 @@ namespace LAMMPS_NS { struct TagPairDPDfdtEnergyZero{}; -template +template struct TagPairDPDfdtEnergyComputeSplit{}; -template +template struct TagPairDPDfdtEnergyComputeNoSplit{}; template @@ -54,21 +54,21 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { KOKKOS_INLINE_FUNCTION void operator()(TagPairDPDfdtEnergyZero, const int&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairDPDfdtEnergyComputeSplit, const int&, EV_FLOAT&) const; + void operator()(TagPairDPDfdtEnergyComputeSplit, const int&, EV_FLOAT&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairDPDfdtEnergyComputeSplit, const int&) const; + void operator()(TagPairDPDfdtEnergyComputeSplit, const int&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&, EV_FLOAT&) const; + void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&, EV_FLOAT&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&) const; + void operator()(TagPairDPDfdtEnergyComputeNoSplit, const int&) const; template KOKKOS_INLINE_FUNCTION @@ -92,7 +92,6 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { protected: int eflag,vflag; int nlocal,neighflag; - int STACKPARAMS; double dtinvsqrt; double boltz,ftm2v; double special_lj[4]; @@ -102,11 +101,10 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { Kokkos::DualView k_params; typename Kokkos::DualView::t_dev_const_um params; - // hardwired to space for 15 atom types + // hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types params_dpd m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; - F_FLOAT m_cut[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; typename ArrayTypes::t_x_array_randomread x; typename ArrayTypes::t_x_array c_x; typename ArrayTypes::t_v_array_randomread v; From aecafecaa2f89f6db8c90ec5af0db429e736b82e Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 13:21:26 -0500 Subject: [PATCH 153/267] USER-DPD: fix missing host prefixes in AtomVecDPDKokkos::pack_comm --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index f46f284f14..18f63599e4 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -563,10 +563,10 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, buf[m++] = h_x(j,0); buf[m++] = h_x(j,1); buf[m++] = h_x(j,2); - buf[m++] = dpdTheta[j]; - buf[m++] = uCond[j]; - buf[m++] = uMech[j]; - buf[m++] = uChem[j]; + buf[m++] = h_dpdTheta[j]; + buf[m++] = h_uCond[j]; + buf[m++] = h_uMech[j]; + buf[m++] = h_uChem[j]; } } else { if (domain->triclinic == 0) { From 2f04e87d0794c66e9fbe0073690e64f7353cfcec Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 13:24:18 -0500 Subject: [PATCH 154/267] USER-DPD: make PairDPDfdtEnergyKokkos's rand_pool public so it can be reused --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 9689712273..deb264c37e 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -89,6 +89,15 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { DAT::tdual_efloat_1d k_duCond,k_duMech; + Kokkos::Random_XorShift64_Pool rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + + // RandPoolWrap rand_pool; + // typedef RandWrap rand_type; + + typename ArrayTypes::tdual_ffloat_2d k_cutsq; + typename ArrayTypes::t_ffloat_2d d_cutsq; + protected: int eflag,vflag; int nlocal,neighflag; @@ -125,15 +134,6 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_numneigh; - typename ArrayTypes::tdual_ffloat_2d k_cutsq; - typename ArrayTypes::t_ffloat_2d d_cutsq; - - /**/Kokkos::Random_XorShift64_Pool rand_pool; - typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type;/**/ - - /**RandPoolWrap rand_pool; - typedef RandWrap rand_type;/**/ - friend void pair_virial_fdotr_compute(PairDPDfdtEnergyKokkos*); }; From a341a6bca927e84a9fc947e402459466ceded503 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 13:27:27 -0500 Subject: [PATCH 155/267] USER-DPD: make locals & ghosts use similar SSA work plan data structure Kokkos SSA won't use AIRct_ssa[], but still used for non-Kokkos for now. --- src/KOKKOS/npair_ssa_kokkos.cpp | 31 +++++++++++++++++++++++++------ src/KOKKOS/npair_ssa_kokkos.h | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 752fc0c938..c70fd0087e 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -31,7 +31,7 @@ namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ template -NPairSSAKokkos::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27) +NPairSSAKokkos::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27), ssa_gphaseCt(7) { } @@ -214,6 +214,7 @@ void NPairSSAKokkos::build(NeighList *list_) int ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; int zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; int phaseLenEstimate = xbin*ybin*zbin; + int gphaseLenEstimate = 1; //FIXME make this 4 eventually if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) { k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt); @@ -227,6 +228,18 @@ void NPairSSAKokkos::build(NeighList *list_) ssa_itemLen = k_ssa_itemLen.view(); } + if (ssa_gphaseCt > (int) k_ssa_gphaseLen.dimension_0()) { + k_ssa_gphaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_gphaseLen",ssa_gphaseCt); + ssa_gphaseLen = k_ssa_gphaseLen.view(); + } + if ((ssa_gphaseCt > (int) k_ssa_gitemLoc.dimension_0()) || + (gphaseLenEstimate > (int) k_ssa_gitemLoc.dimension_1())) { + k_ssa_gitemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLoc",ssa_gphaseCt,gphaseLenEstimate); + ssa_gitemLoc = k_ssa_gitemLoc.view(); + k_ssa_gitemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLen",ssa_gphaseCt,gphaseLenEstimate); + ssa_gitemLen = k_ssa_gitemLen.view(); + } + NPairSSAKokkosExecute data(*list, k_cutneighsq.view(), @@ -243,6 +256,10 @@ void NPairSSAKokkos::build(NeighList *list_) k_ssa_phaseLen.view(), k_ssa_itemLoc.view(), k_ssa_itemLen.view(), + ssa_gphaseCt, + k_ssa_gphaseLen.view(), + k_ssa_gitemLoc.view(), + k_ssa_gitemLen.view(), nlocal, atomKK->k_x.view(), atomKK->k_type.view(), @@ -444,12 +461,13 @@ void NPairSSAKokkosExecute::build_ghosts() int which = 0; int inum = neigh_list.inum; int gnum = 0; - neigh_list.AIRct_ssa[0] = inum; //FIXME // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds - for (int airnum = 1; airnum <= 7; airnum++) { - int locAIRct = 0; + for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { + int airnum = workPhase + 1; + int workItem = 0; //FIXME for now, there is only 1 workItem for each ghost AIR + d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist for (int il = 0; il < c_gbincount(airnum); ++il) { const int i = c_gbins(airnum, il); n = 0; @@ -521,10 +539,11 @@ void NPairSSAKokkosExecute::build_ghosts() resize() = 1; if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); } - ++locAIRct; } } - neigh_list.AIRct_ssa[airnum] = locAIRct; //FIXME + // record where workItem ends in ilist + d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); + if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; } neigh_list.gnum = gnum; //FIXME } diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index a656fe32ba..e38d648984 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -47,6 +47,14 @@ class NPairSSAKokkos : public NPair { typename AT::t_int_2d ssa_itemLoc; typename AT::t_int_2d ssa_itemLen; + const int ssa_gphaseCt; + DAT::tdual_int_1d k_ssa_gphaseLen; + DAT::tdual_int_2d k_ssa_gitemLoc; + DAT::tdual_int_2d k_ssa_gitemLen; + typename AT::t_int_1d ssa_gphaseLen; + typename AT::t_int_2d ssa_gitemLoc; + typename AT::t_int_2d ssa_gitemLen; + NPairSSAKokkos(class LAMMPS *); ~NPairSSAKokkos() {} void copy_neighbor_info(); @@ -169,6 +177,10 @@ class NPairSSAKokkosExecute typename AT::t_int_1d d_ssa_phaseLen; typename AT::t_int_2d d_ssa_itemLoc; typename AT::t_int_2d d_ssa_itemLen; + int ssa_gphaseCt; + typename AT::t_int_1d d_ssa_gphaseLen; + typename AT::t_int_2d d_ssa_gitemLoc; + typename AT::t_int_2d d_ssa_gitemLen; NPairSSAKokkosExecute( const NeighListKokkos &_neigh_list, @@ -188,6 +200,10 @@ class NPairSSAKokkosExecute const typename AT::t_int_1d &_d_ssa_phaseLen, const typename AT::t_int_2d &_d_ssa_itemLoc, const typename AT::t_int_2d &_d_ssa_itemLen, + const int _ssa_gphaseCt, + const typename AT::t_int_1d &_d_ssa_gphaseLen, + const typename AT::t_int_2d &_d_ssa_gitemLoc, + const typename AT::t_int_2d &_d_ssa_gitemLen, const int _nlocal, const typename AT::t_x_array_randomread &_x, const typename AT::t_int_1d_const &_type, @@ -228,6 +244,10 @@ class NPairSSAKokkosExecute d_ssa_phaseLen(_d_ssa_phaseLen), d_ssa_itemLoc(_d_ssa_itemLoc), d_ssa_itemLen(_d_ssa_itemLen), + ssa_gphaseCt(_ssa_gphaseCt), + d_ssa_gphaseLen(_d_ssa_gphaseLen), + d_ssa_gitemLoc(_d_ssa_gitemLoc), + d_ssa_gitemLen(_d_ssa_gitemLen), nlocal(_nlocal), x(_x),type(_type),mask(_mask),molecule(_molecule), tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), From f7a48719adba859eede3808e69556f1e33e4dbf0 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 13:35:48 -0500 Subject: [PATCH 156/267] USER-DPD: first attempt at fix_shardlow_kokkos... It compiles! --- src/KOKKOS/fix_shardlow_kokkos.cpp | 718 +++++++++++++++++++++++++++++ src/KOKKOS/fix_shardlow_kokkos.h | 154 +++++++ 2 files changed, 872 insertions(+) create mode 100644 src/KOKKOS/fix_shardlow_kokkos.cpp create mode 100644 src/KOKKOS/fix_shardlow_kokkos.h diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp new file mode 100644 index 0000000000..7b2810bb4c --- /dev/null +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -0,0 +1,718 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: + James Larentzos (U.S. Army Research Laboratory) + and Timothy I. Mattox (Engility Corporation) + + Martin Lisal (Institute of Chemical Process Fundamentals + of the Czech Academy of Sciences and J. E. Purkinje University) + + John Brennan, Joshua Moore and William Mattson (Army Research Lab) + + Please cite the related publications: + J. P. Larentzos, J. K. Brennan, J. D. Moore, M. Lisal, W. D. Mattson, + "Parallel implementation of isothermal and isoenergetic Dissipative + Particle Dynamics using Shardlow-like splitting algorithms", + Computer Physics Communications, 2014, 185, pp 1987--1998. + + M. Lisal, J. K. Brennan, J. Bonet Avalos, "Dissipative particle dynamics + at isothermal, isobaric, isoenergetic, and isoenthalpic conditions using + Shardlow-like splitting algorithms", Journal of Chemical Physics, 2011, + 135, 204105. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include "fix_shardlow_kokkos.h" +#include "atom.h" +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "force.h" +#include "update.h" +#include "respa.h" +#include "error.h" +#include +#include "atom_vec.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" +#include "random_mars.h" +#include "memory.h" +#include "domain.h" +#include "modify.h" +// #include "pair_dpd_fdt.h" +#include "pair_dpd_fdt_energy_kokkos.h" +#include "pair.h" +#include "npair_ssa_kokkos.h" +#include "citeme.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define EPSILON 1.0e-10 +#define EPSILON_SQUARED ((EPSILON) * (EPSILON)) + + +/* ---------------------------------------------------------------------- */ + +template +FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) : + FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0) +{ + kokkosable = 1; +// atomKK = (AtomKokkos *) atom; +// execution_space = ExecutionSpaceFromDevice::space; + +// datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK; +// datamask_modify = Q_MASK | X_MASK; + + if (narg != 3) error->all(FLERR,"Illegal fix shardlow command"); + +// k_pairDPD = NULL; + k_pairDPDE = NULL; +// k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1); + k_pairDPDE = (PairDPDfdtEnergyKokkos *) force->pair_match("dpd/fdt/energy/kk",1); + +// if(k_pairDPDE){ + comm_forward = 3; + comm_reverse = 5; + p_rand_pool = &(k_pairDPDE->rand_pool); +// } else { +// comm_forward = 3; +// comm_reverse = 3; +// p_rand_pool = &(k_pairDPD->rand_pool); +// } + + + if(/* k_pairDPD == NULL &&*/ k_pairDPDE == NULL) + error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk"); + +} + +/* ---------------------------------------------------------------------- */ + +template +FixShardlowKokkos::~FixShardlowKokkos() +{ + ghostmax = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +int FixShardlowKokkos::setmask() +{ + int mask = 0; + mask |= INITIAL_INTEGRATE | PRE_NEIGHBOR; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShardlowKokkos::init() +{ + FixShardlow::init(); + + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + +// neighbor->requests[irequest]->pair = 0; +// neighbor->requests[irequest]->fix = 1; +// neighbor->requests[irequest]->ghost= 1; +// neighbor->requests[irequest]->ssa = 1; + + int ntypes = atom->ntypes; + k_params = Kokkos::DualView + ("FixShardlowKokkos::params",ntypes+1,ntypes+1); + params = k_params.template view(); +//FIXME either create cutsq and fill it in, or just point to pairDPD's... +// memory->destroy(cutsq); //FIXME +// memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"FixShardlowKokkos:cutsq"); + d_cutsq = k_pairDPDE->k_cutsq.template view(); //FIXME + + const double boltz2 = 2.0*force->boltz; + for (int i = 1; i <= ntypes; i++) { + for (int j = i; j <= ntypes; j++) { + F_FLOAT cutone = k_pairDPDE->cut[i][j]; +// k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; //FIXME + if (cutone > EPSILON) k_params.h_view(i,j).cutinv = 1.0/cutone; + else k_params.h_view(i,j).cutinv = FLT_MAX; + k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j]; + k_params.h_view(i,j).kappa = k_pairDPDE->kappa[i][j]; + k_params.h_view(i,j).alpha = sqrt(boltz2*k_pairDPDE->kappa[i][j]); + + k_params.h_view(j,i) = k_params.h_view(i,j); + + if(ik_cutsq.h_view(i,j); + } + } + } + + // k_cutsq.template modify(); + k_params.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShardlowKokkos::init_list(int id, NeighList *ptr) +{ + k_list = static_cast*>(ptr); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShardlowKokkos::pre_neighbor() +{ + // NOTE: this logic is specific to orthogonal boxes, not triclinic + + // Enforce the constraint that ghosts must be contained in the nearest sub-domains + double bbx = domain->subhi[0] - domain->sublo[0]; + double bby = domain->subhi[1] - domain->sublo[1]; + double bbz = domain->subhi[2] - domain->sublo[2]; + + double rcut = 2.0*neighbor->cutneighmax; + + if (domain->triclinic) + error->all(FLERR,"Fix shardlow does not yet support triclinic geometries"); + + if(rcut >= bbx || rcut >= bby || rcut>= bbz ) + { + char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"}; + char *msg = (char *) malloc(sizeof(fmt) + 4*15); + sprintf(msg, fmt, rcut, bbx, bby, bbz); + error->one(FLERR, msg); + } + + nlocal = atomKK->nlocal; + nghost = atomKK->nghost; + + // Allocate memory for h_v_t0 to hold the initial velocities for the ghosts + if (nghost > ghostmax) { + ghostmax = nghost; + k_v_t0 = DAT::tdual_v_array("FixShardlowKokkos:v_t0", ghostmax); + // d_v_t0 = k_v_t0.template view(); + h_v_t0 = k_v_t0.h_view; + } + + // Setup views of relevant data + x = atomKK->k_x.template view(); + v = atomKK->k_v.template view(); + h_v = atomKK->k_v.h_view; + uCond = atomKK->k_uCond.template view(); + h_uCond = atomKK->k_uCond.h_view; + uMech = atomKK->k_uMech.template view(); + h_uMech = atomKK->k_uMech.h_view; + type = atomKK->k_type.view(); + if (atomKK->rmass) { + massPerI = true; + masses = atomKK->k_rmass.view(); + } else { + massPerI = false; + masses = atomKK->k_mass.view(); + } +// if(k_pairDPDE){ + dpdTheta = atomKK->k_dpdTheta.view(); + +//} else { +//} +} + +template +void FixShardlowKokkos::setup_pre_neighbor() +{ + pre_neighbor(); +} + +/* ---------------------------------------------------------------------- */ + +#ifdef NOTNOW +/* ---------------------------------------------------------------------- + Perform the stochastic integration and Shardlow update for constant temperature + Allow for both per-type and per-atom mass + + NOTE: only implemented for orthogonal boxes, not triclinic +------------------------------------------------------------------------- */ +template +template +void FixShardlowKokkos::ssa_update_dpd( + int start_ii, int count +) +{ + rand_type rand_gen = p_rand_pool->get_state(); + + const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j + const double boltz_inv = 1.0/force->boltz; + const double ftm2v = force->ftm2v; + const double dt = update->dt; + int ct = count; + int ii = start_ii; + + while (ct-- > 0) { + const int i = d_ilist(ii); + const int jlen = d_numneigh(ii); + + const double xtmp = x(i, 0); + const double ytmp = x(i, 1); + const double ztmp = x(i, 2); + + // load velocity for i from memory + double vxi = v(i, 0); + double vyi = v(i, 1); + double vzi = v(i, 2); + + const int itype = type(i); + + const double mass_i = masses(massPerI ? i : itype); + const double massinv_i = 1.0 / mass_i; + + // Loop over Directional Neighbors only + for (int jj = 0; jj < jlen; jj++) { + const int j = d_neighbors(ii,jj) & NEIGHMASK; + int jtype = type[j]; + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test + if ((rsq < STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)) + && (rsq >= EPSILON_SQUARED)) { + double r = sqrt(rsq); + double rinv = 1.0/r; + double delx_rinv = delx*rinv; + double dely_rinv = dely*rinv; + double delz_rinv = delz*rinv; + + double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv); + double wdt = wr*wr*dt; + + double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; + double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; + + double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * pRNG->gaussian(); + + const double mass_j = masses(massPerI ? j : jtype); + double massinv_j = 1.0 / mass_j; + + double gammaFactor = halfgamma_ij*wdt*ftm2v; + double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor); + + double vxj = v(j, 0); + double vyj = v(j, 1); + double vzj = v(j, 2); + + // Compute the initial velocity difference between atom i and atom j + double delvx = vxi - vxj; + double delvy = vyi - vyj; + double delvz = vzi - vzj; + double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz); + + // Compute momentum change between t and t+dt + double factorA = sigmaRand - gammaFactor*dot_rinv; + + // Update the velocity on i + vxi += delx_rinv*factorA*massinv_i; + vyi += dely_rinv*factorA*massinv_i; + vzi += delz_rinv*factorA*massinv_i; + + // Update the velocity on j + vxj -= delx_rinv*factorA*massinv_j; + vyj -= dely_rinv*factorA*massinv_j; + vzj -= delz_rinv*factorA*massinv_j; + + //ii. Compute the new velocity diff + delvx = vxi - vxj; + delvy = vyi - vyj; + delvz = vzi - vzj; + dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz; + + // Compute the new momentum change between t and t+dt + double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor; + + // Update the velocity on i + vxi += delx_rinv*factorB*massinv_i; + vyi += dely_rinv*factorB*massinv_i; + vzi += delz_rinv*factorB*massinv_i; + + // Update the velocity on j + vxj -= delx_rinv*factorB*massinv_j; + vyj -= dely_rinv*factorB*massinv_j; + vzj -= delz_rinv*factorB*massinv_j; + + // Store updated velocity for j + v(j, 0) = vxj; + v(j, 1) = vyj; + v(j, 2) = vzj; + } + } + // store updated velocity for i + v(i, 0) = vxi; + v(i, 1) = vyi; + v(i, 2) = vzi; + } + + p_rand_pool->free_state(rand_gen); +} +#endif + +/* ---------------------------------------------------------------------- + Perform the stochastic integration and Shardlow update for constant energy + Allow for both per-type and per-atom mass + + NOTE: only implemented for orthogonal boxes, not triclinic +------------------------------------------------------------------------- */ +template +template +void FixShardlowKokkos::ssa_update_dpde( + int start_ii, int count +) +{ + rand_type rand_gen = p_rand_pool->get_state(); + + const double boltz_inv = 1.0/force->boltz; + const double ftm2v = force->ftm2v; + const double dt = update->dt; + int ct = count; + int ii = start_ii; + + while (ct-- > 0) { + const int i = d_ilist(ii); + const int jlen = d_numneigh(ii); + + const double xtmp = x(i, 0); + const double ytmp = x(i, 1); + const double ztmp = x(i, 2); + + // load velocity for i from memory + double vxi = v(i, 0); + double vyi = v(i, 1); + double vzi = v(i, 2); + + double uMech_i = uMech(i); + double uCond_i = uCond(i); + const int itype = type(i); + + const double theta_i_inv = 1.0/dpdTheta(i); + const double mass_i = masses(massPerI ? i : itype); + const double massinv_i = 1.0 / mass_i; + const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v; + + // Loop over Directional Neighbors only + for (int jj = 0; jj < jlen; jj++) { + const int j = d_neighbors(ii,jj) & NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = xtmp - x(j, 0); + const X_FLOAT dely = ytmp - x(j, 1); + const X_FLOAT delz = ztmp - x(j, 2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test + if ((rsq < STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)) + && (rsq >= EPSILON_SQUARED)) { + double r = sqrt(rsq); + double rinv = 1.0/r; + double delx_rinv = delx*rinv; + double dely_rinv = dely*rinv; + double delz_rinv = delz*rinv; + + double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv); + double wdt = wr*wr*dt; + + // Compute the current temperature + double theta_j_inv = 1.0/dpdTheta(j); + double theta_ij_inv = 0.5*(theta_i_inv + theta_j_inv); + + double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; + double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; + + double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); + + const double mass_j = masses(massPerI ? j : jtype); + double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v; + double massinv_j = 1.0 / mass_j; + + // Compute uCond + double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; + double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; + double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); + + del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; + uCond[j] -= del_uCond; + uCond_i += del_uCond; + + double gammaFactor = halfgamma_ij*wdt*ftm2v; + double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor); + + double vxj = v(j, 0); + double vyj = v(j, 1); + double vzj = v(j, 2); + double dot4 = vxj*vxj + vyj*vyj + vzj*vzj; + double dot3 = vxi*vxi + vyi*vyi + vzi*vzi; + + // Compute the initial velocity difference between atom i and atom j + double delvx = vxi - vxj; + double delvy = vyi - vyj; + double delvz = vzi - vzj; + double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz); + + // Compute momentum change between t and t+dt + double factorA = sigmaRand - gammaFactor*dot_rinv; + + // Update the velocity on i + vxi += delx_rinv*factorA*massinv_i; + vyi += dely_rinv*factorA*massinv_i; + vzi += delz_rinv*factorA*massinv_i; + + // Update the velocity on j + vxj -= delx_rinv*factorA*massinv_j; + vyj -= dely_rinv*factorA*massinv_j; + vzj -= delz_rinv*factorA*massinv_j; + + //ii. Compute the new velocity diff + delvx = vxi - vxj; + delvy = vyi - vyj; + delvz = vzi - vzj; + dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz; + + // Compute the new momentum change between t and t+dt + double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor; + + // Update the velocity on i + vxi += delx_rinv*factorB*massinv_i; + vyi += dely_rinv*factorB*massinv_i; + vzi += delz_rinv*factorB*massinv_i; + double partial_uMech = (vxi*vxi + vyi*vyi + vzi*vzi - dot3)*massinv_j; + + // Update the velocity on j + vxj -= delx_rinv*factorB*massinv_j; + vyj -= dely_rinv*factorB*massinv_j; + vzj -= delz_rinv*factorB*massinv_j; + partial_uMech += (vxj*vxj + vyj*vyj + vzj*vzj - dot4)*massinv_i; + + // Store updated velocity for j + v(j, 0) = vxj; + v(j, 1) = vyj; + v(j, 2) = vzj; + + // Compute uMech + double del_uMech = partial_uMech*mass_ij_div_neg4_ftm2v; + uMech_i += del_uMech; + uMech(j) += del_uMech; + } + } + // store updated velocity for i + v(i, 0) = vxi; + v(i, 1) = vyi; + v(i, 2) = vzi; + // store updated uMech and uCond for i + uMech(i) = uMech_i; + uCond(i) = uCond_i; + ii++; + } + + p_rand_pool->free_state(rand_gen); +} + + +template +void FixShardlowKokkos::initial_integrate(int vflag) +{ + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + k_list->clean_copy(); + //cleanup_copy(); + copymode = 1; + + dtsqrt = sqrt(update->dt); + + NPairSSAKokkos *np_ssa = dynamic_cast*>(list->np); + if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairSSAKokkos object"); + ssa_phaseCt = np_ssa->ssa_phaseCt; + ssa_phaseLen = np_ssa->ssa_phaseLen; + ssa_itemLoc = np_ssa->ssa_itemLoc; + ssa_itemLen = np_ssa->ssa_itemLen; + ssa_gphaseCt = np_ssa->ssa_gphaseCt; + ssa_gphaseLen = np_ssa->ssa_gphaseLen; + ssa_gitemLoc = np_ssa->ssa_gitemLoc; + ssa_gitemLen = np_ssa->ssa_gitemLen; + + // process neighbors in the local AIR + for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { + int workItemCt = ssa_phaseLen[workPhase]; + + if(atom->ntypes > MAX_TYPES_STACKPARAMS) { + Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + int ct = ssa_itemLen(workPhase, workItem); + int ii = ssa_itemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct); + }); + } else { + Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + int ct = ssa_itemLen(workPhase, workItem); + int ii = ssa_itemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct); + }); + } + } + + //Loop over all 13 outward directions (7 stages) + for (int workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) { + // int airnum = workPhase + 1; + int workItemCt = ssa_gphaseLen[workPhase]; + + // Communicate the updated velocities to all nodes + comm->forward_comm_fix(this); + + if(k_pairDPDE){ + // Zero out the ghosts' uCond & uMech to be used as delta accumulators +// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost); +// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); + + Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+nghost), KOKKOS_LAMBDA (const int i) { + uCond(i) = 0.0; + uMech(i) = 0.0; + }); + DeviceType::fence(); + } + + // process neighbors in this AIR + if(atom->ntypes > MAX_TYPES_STACKPARAMS) { + Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + int ct = ssa_gitemLen(workPhase, workItem); + int ii = ssa_gitemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct); + }); + } else { + Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + int ct = ssa_gitemLen(workPhase, workItem); + int ii = ssa_gitemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct); + }); + } + + // Communicate the ghost deltas to the atom owners + comm->reverse_comm_fix(this); + + } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back + +} + +/* ---------------------------------------------------------------------- */ + +template +int FixShardlowKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) +{ + int ii,jj,m; + + m = 0; + for (ii = 0; ii < n; ii++) { + jj = list[ii]; + buf[m++] = h_v(jj, 0); + buf[m++] = h_v(jj, 1); + buf[m++] = h_v(jj, 2); + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShardlowKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + int ii,m,last; + + m = 0; + last = first + n ; + for (ii = first; ii < last; ii++) { + h_v_t0(ii - nlocal, 0) = h_v(ii, 0) = buf[m++]; + h_v_t0(ii - nlocal, 1) = h_v(ii, 1) = buf[m++]; + h_v_t0(ii - nlocal, 2) = h_v(ii, 2) = buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixShardlowKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0); + buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1); + buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2); + if(k_pairDPDE){ + buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta + buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixShardlowKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + + h_v(j, 0) += buf[m++]; + h_v(j, 1) += buf[m++]; + h_v(j, 2) += buf[m++]; + if(k_pairDPDE){ + h_uCond(j) += buf[m++]; // add in the accumulated delta + h_uMech(j) += buf[m++]; // add in the accumulated delta + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +double FixShardlowKokkos::memory_usage() +{ + double bytes = 0.0; + bytes += sizeof(double)*3*ghostmax; // v_t0[] + return bytes; +} + +namespace LAMMPS_NS { +template class FixShardlowKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixShardlowKokkos; +#endif +} diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h new file mode 100644 index 0000000000..08d9034fdf --- /dev/null +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -0,0 +1,154 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(shardlow/kk,FixShardlowKokkos) +FixStyle(shardlow/kk/device,FixShardlowKokkos) +FixStyle(shardlow/kk/host,FixShardlowKokkos) + +#else + +#ifndef LMP_FIX_SHARDLOW_KOKKOS_H +#define LMP_FIX_SHARDLOW_KOKKOS_H + +#include "float.h" +#include "fix_shardlow.h" +#include "kokkos_type.h" +#include "neigh_list_kokkos.h" +#include "pair_dpd_fdt_energy_kokkos.h" + +namespace LAMMPS_NS { + +template +class FixShardlowKokkos : public FixShardlow { + public: + typedef ArrayTypes AT; + NeighListKokkos *k_list; // The SSA specific neighbor list + + FixShardlowKokkos(class LAMMPS *, int, char **); + ~FixShardlowKokkos(); + int setmask(); + virtual void init(); + virtual void init_list(int, class NeighList *); + virtual void initial_integrate(int); + void setup_pre_neighbor(); + void pre_neighbor(); + + double memory_usage(); + + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + int pack_forward_comm(int , int *, double *, int, int *); + void unpack_forward_comm(int , int , double *); + + struct params_ssa { + KOKKOS_INLINE_FUNCTION + params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;}; + KOKKOS_INLINE_FUNCTION + params_ssa(int i){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;}; + F_FLOAT cutinv,halfsigma,kappa,alpha; + }; + + protected: +// class PairDPDfdt *pairDPD; + PairDPDfdtEnergyKokkos *k_pairDPDE; + Kokkos::Random_XorShift64_Pool *p_rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + // hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types + params_ssa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename ArrayTypes::t_ffloat_2d d_cutsq; + + typename DAT::tdual_v_array k_v_t0; + // typename AT::t_v_array d_v_t0; v_t0 only used in comm routines (on host) + typename HAT::t_v_array h_v_t0; + + typename AT::t_x_array x; + typename AT::t_v_array v; + typename HAT::t_v_array h_v; + typename AT::t_efloat_1d uCond, uMech; + typename HAT::t_efloat_1d h_uCond, h_uMech; + typename AT::t_int_1d type; + bool massPerI; + typename AT::t_float_1d_randomread masses; + typename AT::t_efloat_1d dpdTheta; + + double dtsqrt; // = sqrt(update->dt); + int ghostmax; + int nlocal, nghost; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist, d_numneigh; + + int ssa_phaseCt; + typename AT::t_int_1d ssa_phaseLen; + typename AT::t_int_2d ssa_itemLoc, ssa_itemLen; + + int ssa_gphaseCt; + typename AT::t_int_1d ssa_gphaseLen; + typename AT::t_int_2d ssa_gitemLoc, ssa_gitemLen; + + +// template +// void ssa_update_dpd(int, int); // Constant Temperature + template + void ssa_update_dpde(int, int); // Constant Energy + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Must use dpd/fdt pair_style with fix shardlow + +Self-explanatory. + +E: Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow + +E: A deterministic integrator must be specified after fix shardlow in input +file (e.g. fix nve or fix nph). + +Self-explanatory. + +E: Cannot use constant temperature integration routines with DPD + +Self-explanatory. Must use deterministic integrators such as nve or nph + +E: Fix shardlow does not yet support triclinic geometries + +Self-explanatory. + +E: Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either +reduce the number of processors requested, or change the cutoff/skin + +The Shardlow splitting algorithm requires the size of the sub-domain lengths +to be are larger than twice the cutoff+skin. Generally, the domain decomposition +is dependant on the number of processors requested. + +*/ From 71379487abc7062a698e960c68a97766829bffdf Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 17:35:58 -0500 Subject: [PATCH 157/267] USER-DPD: variety of fixes for new SSA Kokkos code. Still not functional. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 1 + src/KOKKOS/nbin_ssa_kokkos.cpp | 18 ++++++++++++++++++ src/KOKKOS/npair_ssa_kokkos.cpp | 10 ++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 7b2810bb4c..a01cc36c3e 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -179,6 +179,7 @@ void FixShardlowKokkos::init() template void FixShardlowKokkos::init_list(int id, NeighList *ptr) { + FixShardlow::init_list(id, ptr); k_list = static_cast*>(ptr); } diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 32a77119de..ebd07752b0 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -38,10 +38,28 @@ NBinSSAKokkos::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp) atoms_per_bin = ghosts_per_gbin = 16; d_resize = typename AT::t_int_scalar("NBinSSAKokkos::d_resize"); + d_lbinxlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxlo"); + d_lbinylo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinylo"); + d_lbinzlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzlo"); + d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi"); + d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi"); + d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi"); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(d_resize); + h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo); + h_lbinylo = Kokkos::create_mirror_view(d_lbinylo); + h_lbinzlo = Kokkos::create_mirror_view(d_lbinzlo); + h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi); + h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi); + h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi); #else h_resize = d_resize; + h_lbinxlo = d_lbinxlo; + h_lbinylo = d_lbinylo; + h_lbinzlo = d_lbinzlo; + h_lbinxhi = d_lbinxhi; + h_lbinyhi = d_lbinyhi; + h_lbinzhi = d_lbinzhi; #endif h_resize() = 1; diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index c70fd0087e..f94d51197a 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -338,6 +338,12 @@ void NPairSSAKokkos::build(NeighList *list_) k_ssa_phaseLen.modify(); k_ssa_itemLoc.modify(); k_ssa_itemLen.modify(); + k_ssa_gphaseLen.modify(); + k_ssa_gitemLoc.modify(); + k_ssa_gitemLen.modify(); + + list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for + list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something list->k_ilist.template modify(); } @@ -450,7 +456,7 @@ void NPairSSAKokkosExecute::build_locals() //FIXME if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); - neigh_list.inum = inum; //FIXME + neigh_list.inum = inum; } @@ -545,7 +551,7 @@ void NPairSSAKokkosExecute::build_ghosts() d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; } - neigh_list.gnum = gnum; //FIXME + neigh_list.gnum = gnum; } } From c56e0692b9141d1f4442b61f95a7e47d998a44dc Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 17:38:46 -0500 Subject: [PATCH 158/267] USER-DPD Kokkos: enable install of SSA Kokkos code --- src/KOKKOS/Install.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ea70ae4ca1..dda1ba011b 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -103,6 +103,8 @@ action fix_reaxc_species_kokkos.cpp fix_reaxc_species.cpp action fix_reaxc_species_kokkos.h fix_reaxc_species.h action fix_setforce_kokkos.cpp action fix_setforce_kokkos.h +action fix_shardlow_kokkos.cpp fix_shardlow.cpp +action fix_shardlow_kokkos.h fix_shardlow.h action fix_momentum_kokkos.cpp action fix_momentum_kokkos.h action fix_wall_reflect_kokkos.cpp @@ -134,8 +136,12 @@ action npair_copy_kokkos.cpp action npair_copy_kokkos.h action npair_kokkos.cpp action npair_kokkos.h +action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp +action npair_ssa_kokkos.h npair_half_bin_newton_ssa.h action nbin_kokkos.cpp action nbin_kokkos.h +action nbin_ssa_kokkos.cpp nbin_ssa.cpp +action nbin_ssa_kokkos.h nbin_ssa.h action math_special_kokkos.cpp action math_special_kokkos.h action pair_buck_coul_cut_kokkos.cpp From 6ea290a69963a9e6619c51e81f004906194870fe Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 17:41:57 -0500 Subject: [PATCH 159/267] DEBUG: make FixShardlowKokkos have it's own rand_pool, plus debug code. ssa_update_dpde() hangs on first use of rand_gen.normal() Switching to not using a pointer to PairDPDfdtEnergyKokkos's rand_pool had no noticble effect. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 37 ++++++++++++++++++++---------- src/KOKKOS/fix_shardlow_kokkos.h | 5 ++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index a01cc36c3e..fe05db6d33 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -71,7 +71,7 @@ using namespace FixConst; template FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) : - FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0) + FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0), rand_pool(comm->me) { kokkosable = 1; // atomKK = (AtomKokkos *) atom; @@ -85,12 +85,12 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // k_pairDPD = NULL; k_pairDPDE = NULL; // k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1); - k_pairDPDE = (PairDPDfdtEnergyKokkos *) force->pair_match("dpd/fdt/energy/kk",1); + k_pairDPDE = dynamic_cast *>(force->pair_match("dpd/fdt/energy",0)); // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; - p_rand_pool = &(k_pairDPDE->rand_pool); +// p_rand_pool = &(k_pairDPDE->rand_pool); // } else { // comm_forward = 3; // comm_reverse = 3; @@ -263,7 +263,8 @@ void FixShardlowKokkos::ssa_update_dpd( int start_ii, int count ) { - rand_type rand_gen = p_rand_pool->get_state(); + rand_type rand_gen = rand_pool.get_state(); +// rand_type rand_gen = p_rand_pool->get_state(); const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j const double boltz_inv = 1.0/force->boltz; @@ -377,7 +378,8 @@ void FixShardlowKokkos::ssa_update_dpd( v(i, 2) = vzi; } - p_rand_pool->free_state(rand_gen); +// p_rand_pool->free_state(rand_gen); + rand_pool.free_state(rand_gen); } #endif @@ -390,10 +392,13 @@ void FixShardlowKokkos::ssa_update_dpd( template template void FixShardlowKokkos::ssa_update_dpde( - int start_ii, int count + int start_ii, int count, int id ) { - rand_type rand_gen = p_rand_pool->get_state(); + rand_type rand_gen = rand_pool.get_state(); +// rand_type rand_gen = p_rand_pool->get_state(); + +//fprintf(stderr, "ssa_update_dpde(%d,%d,%d)\n", start_ii, count, id); const double boltz_inv = 1.0/force->boltz; const double ftm2v = force->ftm2v; @@ -401,6 +406,11 @@ void FixShardlowKokkos::ssa_update_dpde( int ct = count; int ii = start_ii; +// double randnum1 = rand_gen.normal(); +//fprintf(stderr, "randnum1 = %g\n", randnum1); +// double randnum2 = rand_gen.normal(); +//fprintf(stderr, "randnum2 = %g\n", randnum2); + while (ct-- > 0) { const int i = d_ilist(ii); const int jlen = d_numneigh(ii); @@ -453,6 +463,7 @@ void FixShardlowKokkos::ssa_update_dpde( double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); +// double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * randnum1;//rand_gen.normal(); const double mass_j = masses(massPerI ? j : jtype); double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v; @@ -462,6 +473,7 @@ void FixShardlowKokkos::ssa_update_dpde( double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); +// double del_uCond = alpha_ij*wr*dtsqrt * randnum2;//rand_gen.normal(); del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; uCond[j] -= del_uCond; @@ -537,7 +549,8 @@ void FixShardlowKokkos::ssa_update_dpde( ii++; } - p_rand_pool->free_state(rand_gen); + rand_pool.free_state(rand_gen); +// p_rand_pool->free_state(rand_gen); } @@ -573,13 +586,13 @@ void FixShardlowKokkos::initial_integrate(int vflag) Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { int ct = ssa_itemLen(workPhase, workItem); int ii = ssa_itemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct); + ssa_update_dpde(ii, ct, workItem); }); } else { Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { int ct = ssa_itemLen(workPhase, workItem); int ii = ssa_itemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct); + ssa_update_dpde(ii, ct, workItem); }); } } @@ -609,13 +622,13 @@ void FixShardlowKokkos::initial_integrate(int vflag) Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { int ct = ssa_gitemLen(workPhase, workItem); int ii = ssa_gitemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct); + ssa_update_dpde(ii, ct, workItem); }); } else { Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { int ct = ssa_gitemLen(workPhase, workItem); int ii = ssa_gitemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct); + ssa_update_dpde(ii, ct, workItem); }); } diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 08d9034fdf..b4267226e6 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -63,7 +63,8 @@ class FixShardlowKokkos : public FixShardlow { protected: // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; - Kokkos::Random_XorShift64_Pool *p_rand_pool; + Kokkos::Random_XorShift64_Pool rand_pool; +// Kokkos::Random_XorShift64_Pool *p_rand_pool; typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; Kokkos::DualView k_params; @@ -108,7 +109,7 @@ class FixShardlowKokkos : public FixShardlow { // template // void ssa_update_dpd(int, int); // Constant Temperature template - void ssa_update_dpde(int, int); // Constant Energy + void ssa_update_dpde(int, int, int); // Constant Energy }; From c2e3a76225f421bee13b7256b8be8f1730049214 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 19:07:55 -0500 Subject: [PATCH 160/267] USER-DPD Kokkos: rand seed can't be zero, so add some salt. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index fe05db6d33..65bb7033bb 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -71,7 +71,7 @@ using namespace FixConst; template FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) : - FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0), rand_pool(comm->me) + FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0), rand_pool(1234567 + comm->me) { kokkosable = 1; // atomKK = (AtomKokkos *) atom; From b053c367ea1edd00138e68f673a8928dd9d42151 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 19:09:07 -0500 Subject: [PATCH 161/267] USER-DPD Kokkos: remove extranious debugging code --- src/KOKKOS/fix_shardlow_kokkos.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 65bb7033bb..1ec1455b23 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -90,11 +90,9 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; -// p_rand_pool = &(k_pairDPDE->rand_pool); // } else { // comm_forward = 3; // comm_reverse = 3; -// p_rand_pool = &(k_pairDPD->rand_pool); // } @@ -264,7 +262,6 @@ void FixShardlowKokkos::ssa_update_dpd( ) { rand_type rand_gen = rand_pool.get_state(); -// rand_type rand_gen = p_rand_pool->get_state(); const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j const double boltz_inv = 1.0/force->boltz; @@ -378,7 +375,6 @@ void FixShardlowKokkos::ssa_update_dpd( v(i, 2) = vzi; } -// p_rand_pool->free_state(rand_gen); rand_pool.free_state(rand_gen); } #endif @@ -396,9 +392,6 @@ void FixShardlowKokkos::ssa_update_dpde( ) { rand_type rand_gen = rand_pool.get_state(); -// rand_type rand_gen = p_rand_pool->get_state(); - -//fprintf(stderr, "ssa_update_dpde(%d,%d,%d)\n", start_ii, count, id); const double boltz_inv = 1.0/force->boltz; const double ftm2v = force->ftm2v; @@ -406,11 +399,6 @@ void FixShardlowKokkos::ssa_update_dpde( int ct = count; int ii = start_ii; -// double randnum1 = rand_gen.normal(); -//fprintf(stderr, "randnum1 = %g\n", randnum1); -// double randnum2 = rand_gen.normal(); -//fprintf(stderr, "randnum2 = %g\n", randnum2); - while (ct-- > 0) { const int i = d_ilist(ii); const int jlen = d_numneigh(ii); @@ -463,7 +451,6 @@ void FixShardlowKokkos::ssa_update_dpde( double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); -// double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * randnum1;//rand_gen.normal(); const double mass_j = masses(massPerI ? j : jtype); double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v; @@ -473,7 +460,6 @@ void FixShardlowKokkos::ssa_update_dpde( double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); -// double del_uCond = alpha_ij*wr*dtsqrt * randnum2;//rand_gen.normal(); del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; uCond[j] -= del_uCond; @@ -550,7 +536,6 @@ void FixShardlowKokkos::ssa_update_dpde( } rand_pool.free_state(rand_gen); -// p_rand_pool->free_state(rand_gen); } From 21619b29768553bfcf9d31347c00904973e155d4 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 22:16:33 -0500 Subject: [PATCH 162/267] USER-DPD Kokkos: correct the setup of the ghost SSA workplan --- src/KOKKOS/npair_ssa_kokkos.cpp | 130 ++++++++++++++++---------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index f94d51197a..7eea57d492 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -472,84 +472,88 @@ void NPairSSAKokkosExecute::build_ghosts() // since these are ghosts, must check if stencil bin is out of bounds for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { int airnum = workPhase + 1; - int workItem = 0; //FIXME for now, there is only 1 workItem for each ghost AIR - d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist - for (int il = 0; il < c_gbincount(airnum); ++il) { - const int i = c_gbins(airnum, il); - n = 0; + //FIXME for now, there is only 1 workItem for each ghost AIR + int workItem; + for (workItem = 0; workItem < 1; ++workItem) { + d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist + for (int il = 0; il < c_gbincount(airnum); ++il) { + const int i = c_gbins(airnum, il); + n = 0; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum); - const X_FLOAT xtmp = x(i, 0); - const X_FLOAT ytmp = x(i, 1); - const X_FLOAT ztmp = x(i, 2); - const int itype = type(i); + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum); + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); - const typename ArrayTypes::t_int_1d_const_um stencil - = d_stencil; + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; - int loc[3]; - const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); + int loc[3]; + const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); - // loop over AIR ghost atoms in all bins in "full" stencil - // Note: the non-AIR ghost atoms have already been filtered out - for (int k = 0; k < nstencil; k++) { - int xbin2 = loc[0] + d_stencilxyz(k,0); - int ybin2 = loc[1] + d_stencilxyz(k,1); - int zbin2 = loc[2] + d_stencilxyz(k,2); - // Skip it if this bin is outside the extent of local bins - if (xbin2 < lbinxlo || xbin2 >= lbinxhi || - ybin2 < lbinylo || ybin2 >= lbinyhi || - zbin2 < lbinzlo || zbin2 >= lbinzhi) continue; - const int jbin = ibin+stencil(k); - for (int jl = 0; jl < c_bincount(jbin); ++jl) { - const int j = c_bins(jbin, jl); - const int jtype = type(j); - if(exclude && exclusion(i,j,itype,jtype)) continue; + // loop over AIR ghost atoms in all bins in "full" stencil + // Note: the non-AIR ghost atoms have already been filtered out + for (int k = 0; k < nstencil; k++) { + int xbin2 = loc[0] + d_stencilxyz(k,0); + int ybin2 = loc[1] + d_stencilxyz(k,1); + int zbin2 = loc[2] + d_stencilxyz(k,2); + // Skip it if this bin is outside the extent of local bins + if (xbin2 < lbinxlo || xbin2 >= lbinxhi || + ybin2 < lbinylo || ybin2 >= lbinyhi || + zbin2 < lbinzlo || zbin2 >= lbinzhi) continue; + const int jbin = ibin+stencil(k); + for (int jl = 0; jl < c_bincount(jbin); ++jl) { + const int j = c_bins(jbin, jl); + const int jtype = type(j); + if(exclude && exclusion(i,j,itype,jtype)) continue; - const X_FLOAT delx = xtmp - x(j, 0); - const X_FLOAT dely = ytmp - x(j, 1); - const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if(rsq <= cutneighsq(itype,jtype)) { - if (molecular) { - if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0){ - if(n= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0){ + if(n 0) { + if(n 0) { - if(n 0) { - neigh_list.d_numneigh(inum + gnum) = n; - neigh_list.d_ilist(inum + (gnum++)) = i; - if(n > neigh_list.maxneighs) { - resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if (n > 0) { + neigh_list.d_numneigh(inum + gnum) = n; + neigh_list.d_ilist(inum + (gnum++)) = i; + if(n > neigh_list.maxneighs) { + resize() = 1; + if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + } } } + // record where workItem ends in ilist + d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); + // if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; } - // record where workItem ends in ilist - d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); - if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; + d_ssa_gphaseLen(workPhase) = workItem; } neigh_list.gnum = gnum; } From fd1523c7561e98d61b02c55f74939f5dda97cfe5 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 22:19:53 -0500 Subject: [PATCH 163/267] USER-DPD Kokkos: add missing () in STACKPARAMS check in ssa_update_* --- src/KOKKOS/fix_shardlow_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 1ec1455b23..79e40dee98 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -299,7 +299,7 @@ void FixShardlowKokkos::ssa_update_dpd( const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test - if ((rsq < STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)) + if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { double r = sqrt(rsq); double rinv = 1.0/r; @@ -432,7 +432,7 @@ void FixShardlowKokkos::ssa_update_dpde( const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test - if ((rsq < STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)) + if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { double r = sqrt(rsq); double rinv = 1.0/r; From e4500859a3e2388a2b3275ae1491f28589781e00 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 24 Feb 2017 22:24:29 -0500 Subject: [PATCH 164/267] USER-DPD: add "#ifdef DEBUG_PAIR_CT" debugging code to fix_shardlow* --- src/KOKKOS/fix_shardlow_kokkos.cpp | 61 ++++++++++++++++++++++++++++++ src/KOKKOS/fix_shardlow_kokkos.h | 7 ++++ src/USER-DPD/fix_shardlow.cpp | 53 ++++++++++++++++++++++++++ src/USER-DPD/fix_shardlow.h | 5 +++ 4 files changed, 126 insertions(+) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 79e40dee98..1459819430 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -99,6 +99,17 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a if(/* k_pairDPD == NULL &&*/ k_pairDPDE == NULL) error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk"); +#ifdef DEBUG_PAIR_CT + d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3); + d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32); +#ifndef KOKKOS_USE_CUDA_UVM + h_counters = Kokkos::create_mirror_view(d_counters); + h_hist = Kokkos::create_mirror_view(d_hist); +#else + h_counters = d_counters; + h_hist = d_hist; +#endif +#endif } /* ---------------------------------------------------------------------- */ @@ -297,10 +308,24 @@ void FixShardlowKokkos::ssa_update_dpd( const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); + else Kokkos::atomic_increment(&(d_counters(0, 1))); + Kokkos::atomic_increment(&(d_counters(0, 2))); + int rsqi = rsq / 8; + if (rsqi < 0) rsqi = 0; + else if (rsqi > 31) rsqi = 31; + Kokkos::atomic_increment(&(d_hist(rsqi))); +#endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); + else Kokkos::atomic_increment(&(d_counters(1, 1))); + Kokkos::atomic_increment(&(d_counters(1, 2))); +#endif double r = sqrt(rsq); double rinv = 1.0/r; double delx_rinv = delx*rinv; @@ -430,10 +455,25 @@ void FixShardlowKokkos::ssa_update_dpde( const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); + else Kokkos::atomic_increment(&(d_counters(0, 1))); + Kokkos::atomic_increment(&(d_counters(0, 2))); + int rsqi = rsq / 8; + if (rsqi < 0) rsqi = 0; + else if (rsqi > 31) rsqi = 31; + Kokkos::atomic_increment(&(d_hist(rsqi))); +#endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); + else Kokkos::atomic_increment(&(d_counters(1, 1))); + Kokkos::atomic_increment(&(d_counters(1, 2))); +#endif + double r = sqrt(rsq); double rinv = 1.0/r; double delx_rinv = delx*rinv; @@ -563,6 +603,15 @@ void FixShardlowKokkos::initial_integrate(int vflag) ssa_gitemLoc = np_ssa->ssa_gitemLoc; ssa_gitemLen = np_ssa->ssa_gitemLen; +#ifdef DEBUG_PAIR_CT + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 3; ++j) + h_counters(i,j) = 0; + for (int i = 0; i < 32; ++i) h_hist[i] = 0; + deep_copy(d_counters, h_counters); + deep_copy(d_hist, h_hist); +#endif + // process neighbors in the local AIR for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { int workItemCt = ssa_phaseLen[workPhase]; @@ -622,6 +671,18 @@ void FixShardlowKokkos::initial_integrate(int vflag) } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back +#ifdef DEBUG_PAIR_CT +deep_copy(h_counters, d_counters); +deep_copy(h_hist, d_hist); +for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", h_hist[i]); +fprintf(stdout, "\n%6d %6d,%6d %6d: " + ,h_counters(0, 2) + ,h_counters(1, 2) + ,h_counters(0, 1) + ,h_counters(1, 1) +); +#endif + } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index b4267226e6..ddd4f5b1ba 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -60,6 +60,13 @@ class FixShardlowKokkos : public FixShardlow { F_FLOAT cutinv,halfsigma,kappa,alpha; }; +#ifdef DEBUG_PAIR_CT + typename AT::t_int_2d d_counters; + typename HAT::t_int_2d h_counters; + typename AT::t_int_1d d_hist; + typename HAT::t_int_1d h_hist; +#endif + protected: // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 4a7fff66cf..5132d937ea 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -211,6 +211,10 @@ void FixShardlow::ssa_update_dpd( const double mass_i = (rmass) ? rmass[i] : mass[itype]; const double massinv_i = 1.0 / mass_i; +#ifdef DEBUG_PAIR_CT + const int nlocal = atom->nlocal; +#endif + // Loop over Directional Neighbors only for (int jj = 0; jj < jlen; jj++) { int j = jlist[jj] & NEIGHMASK; @@ -220,9 +224,23 @@ void FixShardlow::ssa_update_dpd( double dely = ytmp - x[j][1]; double delz = ztmp - x[j][2]; double rsq = delx*delx + dely*dely + delz*delz; +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]); + else ++(counters[0][1]); + ++(counters[0][2]); + int rsqi = rsq / 8; + if (rsqi < 0) rsqi = 0; + else if (rsqi > 31) rsqi = 31; + ++(hist[rsqi]); +#endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) { +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]); + else ++(counters[1][1]); + ++(counters[1][2]); +#endif double r = sqrt(rsq); double rinv = 1.0/r; double delx_rinv = delx*rinv; @@ -350,6 +368,10 @@ void FixShardlow::ssa_update_dpde( const double massinv_i = 1.0 / mass_i; const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v; +#ifdef DEBUG_PAIR_CT + const int nlocal = atom->nlocal; +#endif + // Loop over Directional Neighbors only for (int jj = 0; jj < jlen; jj++) { int j = jlist[jj] & NEIGHMASK; @@ -359,9 +381,23 @@ void FixShardlow::ssa_update_dpde( double dely = ytmp - x[j][1]; double delz = ztmp - x[j][2]; double rsq = delx*delx + dely*dely + delz*delz; +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]); + else ++(counters[0][1]); + ++(counters[0][2]); + int rsqi = rsq / 8; + if (rsqi < 0) rsqi = 0; + else if (rsqi > 31) rsqi = 31; + ++(hist[rsqi]); +#endif // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) { +#ifdef DEBUG_PAIR_CT + if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]); + else ++(counters[1][1]); + ++(counters[1][2]); +#endif double r = sqrt(rsq); double rinv = 1.0/r; double delx_rinv = delx*rinv; @@ -493,6 +529,13 @@ void FixShardlow::initial_integrate(int vflag) error->one(FLERR, msg); } +#ifdef DEBUG_PAIR_CT + for (int i = 0; i < 2; ++i) + for (int j = 0; j < 3; ++j) + counters[i][j] = 0; + for (int i = 0; i < 32; ++i) hist[i] = 0; +#endif + // Allocate memory for v_t0 to hold the initial velocities for the ghosts v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0"); @@ -554,6 +597,16 @@ void FixShardlow::initial_integrate(int vflag) } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back +#ifdef DEBUG_PAIR_CT +for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", hist[i]); +fprintf(stdout, "\n%6d %6d,%6d %6d: " + ,counters[0][2] + ,counters[1][2] + ,counters[0][1] + ,counters[1][1] +); +#endif + memory->sfree(v_t0); v_t0 = NULL; } diff --git a/src/USER-DPD/fix_shardlow.h b/src/USER-DPD/fix_shardlow.h index 6fd438b8f0..e87ae3c9cf 100644 --- a/src/USER-DPD/fix_shardlow.h +++ b/src/USER-DPD/fix_shardlow.h @@ -38,6 +38,11 @@ class FixShardlow : public Fix { double memory_usage(); +#ifdef DEBUG_PAIR_CT + int counters[2][3]; + int hist[32]; +#endif + protected: int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); From 35ee24cfad501b694ceba49b7104100101d0a5cb Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 26 Feb 2017 14:50:58 -0500 Subject: [PATCH 165/267] use RandWrap in pair_dpd_fdt_energy_kokkos and fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 25 ++++++++++++++++++----- src/KOKKOS/fix_shardlow_kokkos.h | 8 ++++++-- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 6 +++--- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 8 ++++---- 4 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 1459819430..e82991bcba 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -71,7 +71,7 @@ using namespace FixConst; template FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) : - FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0), rand_pool(1234567 + comm->me) + FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0) { kokkosable = 1; // atomKK = (AtomKokkos *) atom; @@ -90,6 +90,7 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; + p_rand_pool = &(k_pairDPDE->rand_pool); // } else { // comm_forward = 3; // comm_reverse = 3; @@ -272,7 +273,7 @@ void FixShardlowKokkos::ssa_update_dpd( int start_ii, int count ) { - rand_type rand_gen = rand_pool.get_state(); + rand_type rand_gen = p_rand_pool->get_state(); const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j const double boltz_inv = 1.0/force->boltz; @@ -400,7 +401,7 @@ void FixShardlowKokkos::ssa_update_dpd( v(i, 2) = vzi; } - rand_pool.free_state(rand_gen); + p_rand_pool->free_state(rand_gen); } #endif @@ -416,7 +417,11 @@ void FixShardlowKokkos::ssa_update_dpde( int start_ii, int count, int id ) { - rand_type rand_gen = rand_pool.get_state(); +#ifdef USE_RAND_MARS + class RanMars *pRNG = k_pairDPDE->random; +#else + rand_type rand_gen = p_rand_pool->get_state(); +#endif const double boltz_inv = 1.0/force->boltz; const double ftm2v = force->ftm2v; @@ -490,7 +495,11 @@ void FixShardlowKokkos::ssa_update_dpde( double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; +#ifdef USE_RAND_MARS + double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * pRNG->gaussian(); +#else double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); +#endif const double mass_j = masses(massPerI ? j : jtype); double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v; @@ -499,7 +508,11 @@ void FixShardlowKokkos::ssa_update_dpde( // Compute uCond double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; +#ifdef USE_RAND_MARS + double del_uCond = alpha_ij*wr*dtsqrt * pRNG->gaussian(); +#else double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); +#endif del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; uCond[j] -= del_uCond; @@ -575,7 +588,9 @@ void FixShardlowKokkos::ssa_update_dpde( ii++; } - rand_pool.free_state(rand_gen); +#ifndef USE_RAND_MARS + p_rand_pool->free_state(rand_gen); +#endif } diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index ddd4f5b1ba..f71ca1ce11 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -70,9 +70,13 @@ class FixShardlowKokkos : public FixShardlow { protected: // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; - Kokkos::Random_XorShift64_Pool rand_pool; // Kokkos::Random_XorShift64_Pool *p_rand_pool; - typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + +// Kokkos::Random_XorShift64_Pool rand_pool; +// typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + + RandPoolWrap *p_rand_pool; + typedef RandWrap rand_type; Kokkos::DualView k_params; typename Kokkos::DualView PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : - PairDPDfdtEnergy(lmp),rand_pool(seed + comm->me /** , lmp/**/) + PairDPDfdtEnergy(lmp),rand_pool(12345 /* not actually used, seed + comm->me */, lmp) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -68,7 +68,7 @@ PairDPDfdtEnergyKokkos::~PairDPDfdtEnergyKokkos() memory->destroy_kokkos(k_cutsq,cutsq); - /** rand_pool.destroy();/**/ + rand_pool.destroy(); } /* ---------------------------------------------------------------------- @@ -101,7 +101,7 @@ void PairDPDfdtEnergyKokkos::init_style() error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); } - /** rand_pool.init(random,seed);/**/ + rand_pool.init(random,seed); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index deb264c37e..e065d71d3e 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -89,11 +89,11 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { DAT::tdual_efloat_1d k_duCond,k_duMech; - Kokkos::Random_XorShift64_Pool rand_pool; - typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; + // Kokkos::Random_XorShift64_Pool rand_pool; + // typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; - // RandPoolWrap rand_pool; - // typedef RandWrap rand_type; + RandPoolWrap rand_pool; + typedef RandWrap rand_type; typename ArrayTypes::tdual_ffloat_2d k_cutsq; typename ArrayTypes::t_ffloat_2d d_cutsq; From e4b544f934bd816a31759b654f3c26c9ecf36ebd Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 26 Feb 2017 17:53:45 -0500 Subject: [PATCH 166/267] Make pair_dpd_fdt_energy's random seed public so fix_shardlow can use it. --- src/USER-DPD/pair_dpd_fdt_energy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-DPD/pair_dpd_fdt_energy.h b/src/USER-DPD/pair_dpd_fdt_energy.h index f8303d4854..dce39f83f0 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.h +++ b/src/USER-DPD/pair_dpd_fdt_energy.h @@ -46,11 +46,11 @@ class PairDPDfdtEnergy : public Pair { double **sigma,**kappa; double *duCond,*duMech; + int seed; class RanMars *random; protected: double cut_global; - int seed; bool splitFDT_flag; bool a0_is_zero; From 3eba3e5a1b756e4a125e0f88201a2c54399e42ce Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 26 Feb 2017 17:57:13 -0500 Subject: [PATCH 167/267] USER-DPD Kokkos: for deterministic results, serialize bin_atoms() for now. --- src/KOKKOS/nbin_ssa_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index ebd07752b0..98ec638be9 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -140,7 +140,7 @@ void NBinSSAKokkos::bin_atoms() subhi_[1] = domain->subhi[1]; subhi_[2] = domain->subhi[2]; - Kokkos::parallel_for(Kokkos::RangePolicy(atom->nlocal,atom->nlocal+atom->nghost), KOKKOS_LAMBDA (const int i) { + Kokkos::parallel_for(Kokkos::RangePolicy(atom->nlocal,atom->nlocal+atom->nghost), KOKKOS_LAMBDA (const int i) { const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); if (iAIR > 0) { // include only ghost atoms in an AIR const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); @@ -188,7 +188,7 @@ void NBinSSAKokkos::bin_atoms() NPairSSAKokkosBinAtomsFunctor f(*this); - Kokkos::parallel_for(atom->nlocal, f); + Kokkos::parallel_for(Kokkos::RangePolicy(0, atom->nlocal), f); DeviceType::fence(); deep_copy(h_resize, d_resize); From a5507b291d3d78d93136053d530f8be51d57728c Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 26 Feb 2017 18:00:20 -0500 Subject: [PATCH 168/267] USER-DPD Kokkos: give each workItem index a unique instance of RanMars Makes fix_shardlow_kokkos deterministic across runs and thread count. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 27 ++++++++++++++++++++++++++- src/KOKKOS/fix_shardlow_kokkos.h | 6 ++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index e82991bcba..a5e02f3dd8 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -90,7 +90,12 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; +#ifdef USE_RAND_MARS + maxRNG = 0; + pp_random = NULL; +#else p_rand_pool = &(k_pairDPDE->rand_pool); +#endif // } else { // comm_forward = 3; // comm_reverse = 3; @@ -119,6 +124,11 @@ template FixShardlowKokkos::~FixShardlowKokkos() { ghostmax = 0; + if (pp_random) { + for (int i = 1; i < maxRNG; ++i) delete pp_random[i]; + delete[] pp_random; + pp_random = NULL; + } } /* ---------------------------------------------------------------------- */ @@ -418,7 +428,7 @@ void FixShardlowKokkos::ssa_update_dpde( ) { #ifdef USE_RAND_MARS - class RanMars *pRNG = k_pairDPDE->random; + class RanMars *pRNG = pp_random[id]; #else rand_type rand_gen = p_rand_pool->get_state(); #endif @@ -618,6 +628,21 @@ void FixShardlowKokkos::initial_integrate(int vflag) ssa_gitemLoc = np_ssa->ssa_gitemLoc; ssa_gitemLen = np_ssa->ssa_gitemLen; + int maxWorkItemCt = (int) ssa_itemLoc.dimension_1(); + if (maxWorkItemCt > maxRNG) { + if (pp_random) { + for (int i = 1; i < maxRNG; ++i) delete pp_random[i]; + delete[] pp_random; + pp_random = NULL; + } + maxRNG = maxWorkItemCt; + pp_random = new RanMars*[maxRNG]; + for (int i = 1; i < maxRNG; ++i) { + pp_random[i] = new RanMars(lmp, k_pairDPDE->seed + comm->me + comm->nprocs*i); + } + pp_random[0] = k_pairDPDE->random; + } + #ifdef DEBUG_PAIR_CT for (int i = 0; i < 2; ++i) for (int j = 0; j < 3; ++j) diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index f71ca1ce11..95e8add64a 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -75,8 +75,10 @@ class FixShardlowKokkos : public FixShardlow { // Kokkos::Random_XorShift64_Pool rand_pool; // typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; - RandPoolWrap *p_rand_pool; - typedef RandWrap rand_type; +// RandPoolWrap *p_rand_pool; +// typedef RandWrap rand_type; + int maxRNG; + class RanMars **pp_random; Kokkos::DualView k_params; typename Kokkos::DualView Date: Tue, 28 Feb 2017 12:49:11 -0500 Subject: [PATCH 169/267] USER-DPD Kokkos: Add "#ifdef DPD_USE_RAN_MARS" toggle Also, initialize the rand_pool with a seed in init_style() --- src/KOKKOS/fix_shardlow_kokkos.cpp | 14 ++++++++----- src/KOKKOS/fix_shardlow_kokkos.h | 11 +++++------ src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 13 +++++++++++- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 24 +++++++++++++++++++---- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index a5e02f3dd8..9bac6250da 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -90,7 +90,7 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; -#ifdef USE_RAND_MARS +#ifdef DPD_USE_RAN_MARS maxRNG = 0; pp_random = NULL; #else @@ -124,11 +124,13 @@ template FixShardlowKokkos::~FixShardlowKokkos() { ghostmax = 0; +#ifdef DPD_USE_RAN_MARS if (pp_random) { for (int i = 1; i < maxRNG; ++i) delete pp_random[i]; delete[] pp_random; pp_random = NULL; } +#endif } /* ---------------------------------------------------------------------- */ @@ -427,7 +429,7 @@ void FixShardlowKokkos::ssa_update_dpde( int start_ii, int count, int id ) { -#ifdef USE_RAND_MARS +#ifdef DPD_USE_RAN_MARS class RanMars *pRNG = pp_random[id]; #else rand_type rand_gen = p_rand_pool->get_state(); @@ -505,7 +507,7 @@ void FixShardlowKokkos::ssa_update_dpde( double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; -#ifdef USE_RAND_MARS +#ifdef DPD_USE_RAN_MARS double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * pRNG->gaussian(); #else double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); @@ -518,7 +520,7 @@ void FixShardlowKokkos::ssa_update_dpde( // Compute uCond double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; -#ifdef USE_RAND_MARS +#ifdef DPD_USE_RAN_MARS double del_uCond = alpha_ij*wr*dtsqrt * pRNG->gaussian(); #else double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); @@ -598,7 +600,7 @@ void FixShardlowKokkos::ssa_update_dpde( ii++; } -#ifndef USE_RAND_MARS +#ifndef DPD_USE_RAN_MARS p_rand_pool->free_state(rand_gen); #endif } @@ -628,6 +630,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) ssa_gitemLoc = np_ssa->ssa_gitemLoc; ssa_gitemLen = np_ssa->ssa_gitemLen; +#ifdef DPD_USE_RAN_MARS int maxWorkItemCt = (int) ssa_itemLoc.dimension_1(); if (maxWorkItemCt > maxRNG) { if (pp_random) { @@ -642,6 +645,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) } pp_random[0] = k_pairDPDE->random; } +#endif #ifdef DEBUG_PAIR_CT for (int i = 0; i < 2; ++i) diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 95e8add64a..011c16dc60 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -70,15 +70,14 @@ class FixShardlowKokkos : public FixShardlow { protected: // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; -// Kokkos::Random_XorShift64_Pool *p_rand_pool; -// Kokkos::Random_XorShift64_Pool rand_pool; -// typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; - -// RandPoolWrap *p_rand_pool; -// typedef RandWrap rand_type; +#ifdef DPD_USE_RAN_MARS int maxRNG; class RanMars **pp_random; +#else + Kokkos::Random_XorShift64_Pool *p_rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; +#endif Kokkos::DualView k_params; typename Kokkos::DualView PairDPDfdtEnergyKokkos::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : - PairDPDfdtEnergy(lmp),rand_pool(12345 /* not actually used, seed + comm->me */, lmp) + PairDPDfdtEnergy(lmp), +#ifdef DPD_USE_RAN_MARS + rand_pool(0 /* unused */, lmp) +#else + rand_pool() +#endif { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -68,7 +73,9 @@ PairDPDfdtEnergyKokkos::~PairDPDfdtEnergyKokkos() memory->destroy_kokkos(k_cutsq,cutsq); +#ifdef DPD_USE_RAN_MARS rand_pool.destroy(); +#endif } /* ---------------------------------------------------------------------- @@ -101,7 +108,11 @@ void PairDPDfdtEnergyKokkos::init_style() error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); } +#ifdef DPD_USE_RAN_MARS rand_pool.init(random,seed); +#else + rand_pool.init(seed + comm->me,DeviceType::max_hardware_threads()); +#endif } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index e065d71d3e..a32539242a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -22,11 +22,25 @@ PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) #ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H #define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H + +#ifndef ALLOW_NON_DETERMINISTIC_DPD +#ifdef KOKKOS_HAVE_CUDA +//FIXME print some warning +#endif +#ifndef DPD_USE_RAN_MARS +#define DPD_USE_RAN_MARS +#endif +#endif + + #include "pair_dpd_fdt_energy.h" #include "pair_kokkos.h" #include "kokkos_type.h" -#include "Kokkos_Random.hpp" +#ifdef DPD_USE_RAN_MARS #include "rand_pool_wrap_kokkos.h" +#else +#include "Kokkos_Random.hpp" +#endif namespace LAMMPS_NS { @@ -89,11 +103,13 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { DAT::tdual_efloat_1d k_duCond,k_duMech; - // Kokkos::Random_XorShift64_Pool rand_pool; - // typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; - +#ifdef DPD_USE_RAN_MARS RandPoolWrap rand_pool; typedef RandWrap rand_type; +#else + Kokkos::Random_XorShift64_Pool rand_pool; + typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; +#endif typename ArrayTypes::tdual_ffloat_2d k_cutsq; typename ArrayTypes::t_ffloat_2d d_cutsq; From b26a434a502d953b3b7fd2772a4bbc5a5091f468 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 28 Feb 2017 12:53:56 -0500 Subject: [PATCH 170/267] USER-DPD Kokkos: Add "#ifdef ALLOW_NON_DETERMINISTIC_SSA" toggle SSA atom binning algorithm was adjusted to do as much work in parallel while preserving deterministic behavior. The final step is done serially to preserve deterministic behavior. An alternative would be to sort the contents of the bins so that they are always in the same order. --- src/KOKKOS/nbin_ssa_kokkos.cpp | 189 +++++++++++++++++++-------------- src/KOKKOS/nbin_ssa_kokkos.h | 65 ++++++++++++ 2 files changed, 172 insertions(+), 82 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 98ec638be9..53f3f2fc80 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -115,89 +115,31 @@ void NBinSSAKokkos::bin_atoms() last_bin = update->ntimestep; int i; + int nlocal = atom->nlocal; + int nghost = atom->nghost; + int nall = nlocal + nghost; - // bin the ghost atoms - h_resize() = 1; - while(h_resize() > 0) { - h_resize() = 0; - deep_copy(d_resize, h_resize); + atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + x = atomKK->k_x.view(); - for (int i = 0; i < 8; i++) { - k_gbincount.h_view(i) = 0; - } - k_gbincount.modify(); - k_gbincount.sync(); - DeviceType::fence(); // FIXME? + sublo_[0] = domain->sublo[0]; + sublo_[1] = domain->sublo[1]; + sublo_[2] = domain->sublo[2]; + subhi_[0] = domain->subhi[0]; + subhi_[1] = domain->subhi[1]; + subhi_[2] = domain->subhi[2]; - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - x = atomKK->k_x.view(); + bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; + bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; - // I don't think these two lines need to be repeated here... - TIM 20170216 - sublo_[0] = domain->sublo[0]; - sublo_[1] = domain->sublo[1]; - sublo_[2] = domain->sublo[2]; - subhi_[0] = domain->subhi[0]; - subhi_[1] = domain->subhi[1]; - subhi_[2] = domain->subhi[2]; + k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",nall); + binID = k_binID.view(); - Kokkos::parallel_for(Kokkos::RangePolicy(atom->nlocal,atom->nlocal+atom->nghost), KOKKOS_LAMBDA (const int i) { - const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); - if (iAIR > 0) { // include only ghost atoms in an AIR - const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); - if(ac < (int) gbins.dimension_1()) { - gbins(iAIR, ac) = i; - } else { - d_resize() = 1; - } - } - }); - DeviceType::fence(); - - deep_copy(h_resize, d_resize); - if(h_resize()) { - k_gbincount.modify(); - k_gbincount.sync(); - for (i = 1; i < 8; i++) { - if (k_gbincount.h_view(i) > ghosts_per_gbin) { - ghosts_per_gbin = k_gbincount.h_view(i); - } - } - k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin); - gbins = k_gbins.view(); - } - } - c_gbins = gbins; // gbins won't change until the next bin_atoms - - // bin the local atoms - h_resize() = 1; - while(h_resize() > 0) { - h_resize() = 0; - deep_copy(d_resize, h_resize); - - MemsetZeroFunctor f_zero; - f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); - Kokkos::parallel_for(mbins, f_zero); - DeviceType::fence(); - - atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - x = atomKK->k_x.view(); - - // I don't think these two lines need to be repeated here... - TIM 20170216 - bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; - bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; - - NPairSSAKokkosBinAtomsFunctor f(*this); - - Kokkos::parallel_for(Kokkos::RangePolicy(0, atom->nlocal), f); - DeviceType::fence(); - - deep_copy(h_resize, d_resize); - if(h_resize()) { - - atoms_per_bin += 16; - k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); - bins = k_bins.view(); - } + // find each local atom's binID + { + atoms_per_bin = 0; + NPairSSAKokkosBinIDAtomsFunctor f(*this); + Kokkos::parallel_reduce(nlocal, f, atoms_per_bin); } deep_copy(h_lbinxlo, d_lbinxlo); deep_copy(h_lbinylo, d_lbinylo); @@ -205,7 +147,72 @@ void NBinSSAKokkos::bin_atoms() deep_copy(h_lbinxhi, d_lbinxhi); deep_copy(h_lbinyhi, d_lbinyhi); deep_copy(h_lbinzhi, d_lbinzhi); + + // find each ghost's binID (AIR number) + { + for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0; + k_gbincount.modify(); + k_gbincount.sync(); + DeviceType::fence(); // FIXME? + ghosts_per_gbin = 0; + NPairSSAKokkosBinIDGhostsFunctor f(*this); + Kokkos::parallel_reduce(Kokkos::RangePolicy(nlocal,nall), f, ghosts_per_gbin); + } + + // actually bin the ghost atoms + { + if(ghosts_per_gbin > (int) gbins.dimension_1()) { + k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin); + gbins = k_gbins.view(); + } + for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0; + k_gbincount.modify(); + k_gbincount.sync(); + DeviceType::fence(); // FIXME? + + Kokkos::parallel_for( +#ifdef ALLOW_NON_DETERMINISTIC_SSA + Kokkos::RangePolicy(nlocal,nall) +#else + Kokkos::RangePolicy(nlocal,nall) +#endif + , KOKKOS_LAMBDA (const int i) { + const int iAIR = binID(i); + if (iAIR > 0) { // include only ghost atoms in an AIR + const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); + gbins(iAIR, ac) = i; + } + }); + DeviceType::fence(); + } + c_gbins = gbins; // gbins won't change until the next bin_atoms + + // actually bin the local atoms + { + if ((mbins > (int) bins.dimension_0()) || + (atoms_per_bin > (int) bins.dimension_1())) { + k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); + bins = k_bins.view(); + } + MemsetZeroFunctor f_zero; + f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); + Kokkos::parallel_for(mbins, f_zero); + DeviceType::fence(); + + NPairSSAKokkosBinAtomsFunctor f(*this); +#ifdef ALLOW_NON_DETERMINISTIC_SSA + Kokkos::parallel_for(nlocal, f); +#else + Kokkos::parallel_for(Kokkos::RangePolicy(0, nlocal), f); +#endif + DeviceType::fence(); + + } c_bins = bins; // bins won't change until the next bin_atoms + +//now dispose of the k_binID array + k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0); + binID = k_binID.view(); } /* ---------------------------------------------------------------------- */ @@ -213,9 +220,19 @@ void NBinSSAKokkos::bin_atoms() template KOKKOS_INLINE_FUNCTION void NBinSSAKokkos::binAtomsItem(const int &i) const +{ + const int ibin = binID(i); + const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1); + bins(ibin, ac) = i; +} + +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::binIDAtomsItem(const int &i, int &update) const { int loc[3]; const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); + binID(i) = ibin; // Find the bounding box of the local atoms in the bins if (loc[0] < d_lbinxlo()) Kokkos::atomic_fetch_min(&d_lbinxlo(),loc[0]); @@ -226,10 +243,18 @@ void NBinSSAKokkos::binAtomsItem(const int &i) const if (loc[2] >= d_lbinzhi()) Kokkos::atomic_fetch_max(&d_lbinzhi(),loc[2] + 1); const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1); - if(ac < (int) bins.dimension_1()) { - bins(ibin, ac) = i; - } else { - d_resize() = 1; + if (update <= ac) update = ac + 1; +} + +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::binIDGhostsItem(const int &i, int &update) const +{ + const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2)); + binID(i) = iAIR; + if (iAIR > 0) { // include only ghost atoms in an AIR + const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); + if (update <= ac) update = ac + 1; } } diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h index 488c1034f5..69f05c9304 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.h +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -41,6 +41,11 @@ class NBinSSAKokkos : public NBinStandard { void bin_atoms_setup(int); void bin_atoms(); + // temporary array to hold the binID for each atom + DAT::tdual_int_1d k_binID; + typename AT::t_int_1d binID; + typename AT::t_int_1d_const c_binID; + int atoms_per_bin; DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; @@ -77,6 +82,12 @@ class NBinSSAKokkos : public NBinStandard { KOKKOS_INLINE_FUNCTION void binAtomsItem(const int &i) const; + KOKKOS_INLINE_FUNCTION + void binIDAtomsItem(const int &i, int &update) const; + + KOKKOS_INLINE_FUNCTION + void binIDGhostsItem(const int &i, int &update) const; + /* ---------------------------------------------------------------------- convert atom coords into the ssa active interaction region number ------------------------------------------------------------------------- */ @@ -165,6 +176,60 @@ struct NPairSSAKokkosBinAtomsFunctor { } }; +template +struct NPairSSAKokkosBinIDAtomsFunctor { + typedef DeviceType device_type; + typedef int value_type; + + const NBinSSAKokkos c; + + NPairSSAKokkosBinIDAtomsFunctor(const NBinSSAKokkos &_c): + c(_c) {}; + ~NPairSSAKokkosBinIDAtomsFunctor() {} + KOKKOS_INLINE_FUNCTION + void operator() (const int & i, value_type& update) const { + c.binIDAtomsItem(i, update); + } + + KOKKOS_INLINE_FUNCTION + void join (volatile value_type& dst, + const volatile value_type& src) const { + if (dst < src) dst = src; + } + + KOKKOS_INLINE_FUNCTION + void init (value_type& dst) const { + dst = INT_MIN; + } +}; + +template +struct NPairSSAKokkosBinIDGhostsFunctor { + typedef DeviceType device_type; + typedef int value_type; + + const NBinSSAKokkos c; + + NPairSSAKokkosBinIDGhostsFunctor(const NBinSSAKokkos &_c): + c(_c) {}; + ~NPairSSAKokkosBinIDGhostsFunctor() {} + KOKKOS_INLINE_FUNCTION + void operator() (const int & i, value_type& update) const { + c.binIDGhostsItem(i, update); + } + + KOKKOS_INLINE_FUNCTION + void join (volatile value_type& dst, + const volatile value_type& src) const { + if (dst < src) dst = src; + } + + KOKKOS_INLINE_FUNCTION + void init (value_type& dst) const { + dst = INT_MIN; + } +}; + } #endif From 0982331c71fbb9d9420e26c47dfad1a0392fd028 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 09:49:24 -0500 Subject: [PATCH 171/267] USER-DPD Kokkos: replicate 7a593c2f bugfix to pair_table_rx_kokkos.cpp --- src/KOKKOS/pair_table_rx_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 58108c9308..2a1ee2c0b1 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -984,7 +984,7 @@ void PairTableRXKokkos::coeff(int narg, char **arg) nspecies = atom->nspecies_dpd; if(nspecies==0) error->all(FLERR,"There are no rx species specified."); int n; - n = strlen(arg[3]) + 1; + n = strlen(arg[4]) + 1; site1 = new char[n]; strcpy(site1,arg[4]); @@ -995,7 +995,7 @@ void PairTableRXKokkos::coeff(int narg, char **arg) if (ispecies == nspecies && strcmp(site1,"1fluid") != 0) error->all(FLERR,"Site1 name not recognized in pair coefficients"); - n = strlen(arg[4]) + 1; + n = strlen(arg[5]) + 1; site2 = new char[n]; strcpy(site2,arg[5]); From 6e26358ec3eade9bb35f4dc1ad48b5374cec417d Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 11:46:26 -0500 Subject: [PATCH 172/267] lib kokkos bugfix: on a CUDA host, the random state wasn't preserved. Random_XorShift*_Pool::free_state() has two purposes: 1) update the state value kept in the pool 2) unlock the state For a CUDA host thread, ONLY skip step 2, not both. --- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index d376173bf1..2fb6b553c2 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -1204,8 +1204,8 @@ Random_XorShift64 Random_XorShift64_Pool::get_state( template<> KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { -#ifdef __CUDA_ARCH__ state_(state.state_idx_) = state.state_; +#ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; return; #endif @@ -1240,9 +1240,9 @@ Random_XorShift1024 Random_XorShift1024_Pool::get_st template<> KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { -#ifdef __CUDA_ARCH__ for(int i=0; i<16; i++) state_(state.state_idx_,i) = state.state_[i]; +#ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; return; #endif From 641bf72f2030f78d1ecfbbedc60704f4215d9662 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 11:52:33 -0500 Subject: [PATCH 173/267] lib kokkos: Enable deterministic use of Random_XorShift*_Pool. Add support for lock-free and deterministic use of Random_XorShift*_Pool by giving state_idx selection and lock responsibility up to the application. Done by an overload of get_state() to take sate_idx as an argument that the appplication guarantees is concurrently unique and within the range of num_states that the application passed to init(). In other words, this allows the RNG state to be associated with some application specific index, rather than a runtime arbitrary thread ID, and thus the application can control which work is performed using which RNG in a deterministic manner, regardless of which thread performs the work. --- lib/kokkos/algorithms/src/Kokkos_Random.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 2fb6b553c2..a0d666183c 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -752,6 +752,12 @@ namespace Kokkos { return Random_XorShift64(state_(i),i); } + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state(const int state_idx) const { + return Random_XorShift64(state_(state_idx),state_idx); + } + KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_) = state.state_; @@ -1006,6 +1012,12 @@ namespace Kokkos { return Random_XorShift1024(state_,p_(i),i); }; + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state(const int state_idx) const { + return Random_XorShift1024(state_,p_(state_idx),state_idx); + } + KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for(int i = 0; i<16; i++) From 268e855a151360861f7b6059414356af1323e997 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 14:14:29 -0500 Subject: [PATCH 174/267] USER-DPD Kokkos: bugfix for the rare case were the SSA ghost processing has more parallelism than for the locals. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 9bac6250da..8b6432e2dc 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -50,7 +50,6 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "random_mars.h" #include "memory.h" #include "domain.h" #include "modify.h" @@ -632,6 +631,9 @@ void FixShardlowKokkos::initial_integrate(int vflag) #ifdef DPD_USE_RAN_MARS int maxWorkItemCt = (int) ssa_itemLoc.dimension_1(); + if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) { + maxWorkItemCt = (int) ssa_gitemLoc.dimension_1(); + } if (maxWorkItemCt > maxRNG) { if (pp_random) { for (int i = 1; i < maxRNG; ++i) delete pp_random[i]; From ed089c34cfda77885d235ade1a285d30109ce157 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 14:18:14 -0500 Subject: [PATCH 175/267] USER-DPD Kokkos: Now use the deterministic Random_XorShift64() for SSA --- src/KOKKOS/fix_shardlow_kokkos.cpp | 49 ++++++++++++++--------- src/KOKKOS/fix_shardlow_kokkos.h | 7 +++- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 1 - src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 17 ++++---- 4 files changed, 44 insertions(+), 30 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 8b6432e2dc..996f37257d 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -89,11 +89,9 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a // if(k_pairDPDE){ comm_forward = 3; comm_reverse = 5; -#ifdef DPD_USE_RAN_MARS maxRNG = 0; +#ifdef DPD_USE_RAN_MARS pp_random = NULL; -#else - p_rand_pool = &(k_pairDPDE->rand_pool); #endif // } else { // comm_forward = 3; @@ -281,10 +279,14 @@ void FixShardlowKokkos::setup_pre_neighbor() template template void FixShardlowKokkos::ssa_update_dpd( - int start_ii, int count + int start_ii, int count, int id ) { - rand_type rand_gen = p_rand_pool->get_state(); +#ifdef DPD_USE_RAN_MARS + class RanMars *pRNG = pp_random[id]; +#else + rand_type rand_gen = rand_pool.get_state(id); +#endif const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j const double boltz_inv = 1.0/force->boltz; @@ -350,7 +352,12 @@ void FixShardlowKokkos::ssa_update_dpd( double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; - double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * pRNG->gaussian(); + double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * +#ifdef DPD_USE_RAN_MARS + pRNG->gaussian(); +#else + rand_gen.normal(); +#endif const double mass_j = masses(massPerI ? j : jtype); double massinv_j = 1.0 / mass_j; @@ -412,7 +419,9 @@ void FixShardlowKokkos::ssa_update_dpd( v(i, 2) = vzi; } - p_rand_pool->free_state(rand_gen); +#ifndef DPD_USE_RAN_MARS + rand_pool.free_state(rand_gen); +#endif } #endif @@ -431,7 +440,7 @@ void FixShardlowKokkos::ssa_update_dpde( #ifdef DPD_USE_RAN_MARS class RanMars *pRNG = pp_random[id]; #else - rand_type rand_gen = p_rand_pool->get_state(); + rand_type rand_gen = rand_pool.get_state(id); #endif const double boltz_inv = 1.0/force->boltz; @@ -506,10 +515,11 @@ void FixShardlowKokkos::ssa_update_dpde( double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma; double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv; + double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * #ifdef DPD_USE_RAN_MARS - double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * pRNG->gaussian(); + pRNG->gaussian(); #else - double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v * rand_gen.normal(); + rand_gen.normal(); #endif const double mass_j = masses(massPerI ? j : jtype); @@ -519,10 +529,11 @@ void FixShardlowKokkos::ssa_update_dpde( // Compute uCond double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa; double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha; + double del_uCond = alpha_ij*wr*dtsqrt * #ifdef DPD_USE_RAN_MARS - double del_uCond = alpha_ij*wr*dtsqrt * pRNG->gaussian(); + pRNG->gaussian(); #else - double del_uCond = alpha_ij*wr*dtsqrt * rand_gen.normal(); + rand_gen.normal(); #endif del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt; @@ -600,7 +611,7 @@ void FixShardlowKokkos::ssa_update_dpde( } #ifndef DPD_USE_RAN_MARS - p_rand_pool->free_state(rand_gen); + rand_pool.free_state(rand_gen); #endif } @@ -629,25 +640,27 @@ void FixShardlowKokkos::initial_integrate(int vflag) ssa_gitemLoc = np_ssa->ssa_gitemLoc; ssa_gitemLen = np_ssa->ssa_gitemLen; -#ifdef DPD_USE_RAN_MARS int maxWorkItemCt = (int) ssa_itemLoc.dimension_1(); if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) { maxWorkItemCt = (int) ssa_gitemLoc.dimension_1(); } if (maxWorkItemCt > maxRNG) { +#ifdef DPD_USE_RAN_MARS if (pp_random) { for (int i = 1; i < maxRNG; ++i) delete pp_random[i]; delete[] pp_random; pp_random = NULL; } - maxRNG = maxWorkItemCt; - pp_random = new RanMars*[maxRNG]; - for (int i = 1; i < maxRNG; ++i) { + pp_random = new RanMars*[maxWorkItemCt]; + for (int i = 1; i < maxWorkItemCt; ++i) { pp_random[i] = new RanMars(lmp, k_pairDPDE->seed + comm->me + comm->nprocs*i); } pp_random[0] = k_pairDPDE->random; - } +#else + rand_pool.init(k_pairDPDE->seed + comm->me, maxWorkItemCt); #endif + maxRNG = maxWorkItemCt; + } #ifdef DEBUG_PAIR_CT for (int i = 0; i < 2; ++i) diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 011c16dc60..c4711f5b8b 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -71,11 +71,14 @@ class FixShardlowKokkos : public FixShardlow { // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; -#ifdef DPD_USE_RAN_MARS int maxRNG; +#ifdef DPD_USE_RAN_MARS class RanMars **pp_random; +#elif defined(DPD_USE_Random_XorShift1024) + Kokkos::Random_XorShift1024_Pool rand_pool; + typedef typename Kokkos::Random_XorShift1024_Pool::generator_type rand_type; #else - Kokkos::Random_XorShift64_Pool *p_rand_pool; + Kokkos::Random_XorShift64_Pool rand_pool; typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; #endif diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index e534f97391..ba61185a57 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -28,7 +28,6 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" -#include "random_mars.h" #include "memory.h" #include "modify.h" #include "pair_dpd_fdt_energy_kokkos.h" diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index a32539242a..74fe5a63b8 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -22,16 +22,12 @@ PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) #ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H #define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H +//#define DPD_USE_RAN_MARS +#define DPD_USE_Random_XorShift64 -#ifndef ALLOW_NON_DETERMINISTIC_DPD -#ifdef KOKKOS_HAVE_CUDA -//FIXME print some warning +#if !defined(DPD_USE_RAN_MARS) && !defined(DPD_USE_Random_XorShift64) && !defined(Random_XorShift1024) +#define DPD_USE_Random_XorShift64 #endif -#ifndef DPD_USE_RAN_MARS -#define DPD_USE_RAN_MARS -#endif -#endif - #include "pair_dpd_fdt_energy.h" #include "pair_kokkos.h" @@ -106,9 +102,12 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { #ifdef DPD_USE_RAN_MARS RandPoolWrap rand_pool; typedef RandWrap rand_type; -#else +#elif defined(DPD_USE_Random_XorShift64) Kokkos::Random_XorShift64_Pool rand_pool; typedef typename Kokkos::Random_XorShift64_Pool::generator_type rand_type; +#elif defined(DPD_USE_Random_XorShift1024) + Kokkos::Random_XorShift1024_Pool rand_pool; + typedef typename Kokkos::Random_XorShift1024_Pool::generator_type rand_type; #endif typename ArrayTypes::tdual_ffloat_2d k_cutsq; From 8210b25fb848c15484a2dd4dd46af2c933e28bd1 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 1 Mar 2017 15:34:24 -0500 Subject: [PATCH 176/267] USER-DPD Kokkos: replicate 9a560b90 bugfix to atom_vec_dpd_kokkos.cpp --- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 820f11c215..146ae8f7dd 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -1048,8 +1048,6 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) buf[m++] = h_uCond[j]; buf[m++] = h_uMech[j]; buf[m++] = h_uChem[j]; - buf[m++] = h_uCG[j]; - buf[m++] = h_uCGnew[j]; } return m; } @@ -1245,8 +1243,6 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_uCond(i) = buf[m++]; h_uMech(i) = buf[m++]; h_uChem(i) = buf[m++]; - h_uCG(i) = buf[m++]; - h_uCGnew(i) = buf[m++]; } modified(Host,DPDTHETA_MASK | UCOND_MASK | From d95fbf3a5e4afb3aaef5d9d931893a06dd609d94 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 2 Mar 2017 15:01:41 -0500 Subject: [PATCH 177/267] USER-DPD Kokkos: use Random_XorShift64() by default, but allow overrides --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index 74fe5a63b8..fcf4b33a7a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -22,9 +22,6 @@ PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos) #ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H #define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H -//#define DPD_USE_RAN_MARS -#define DPD_USE_Random_XorShift64 - #if !defined(DPD_USE_RAN_MARS) && !defined(DPD_USE_Random_XorShift64) && !defined(Random_XorShift1024) #define DPD_USE_Random_XorShift64 #endif From 27d2e9bf56f04fbb598443b84fb1c8f18f53a9aa Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 2 Mar 2017 15:03:33 -0500 Subject: [PATCH 178/267] USER-DPD: add npair_halffull_newton_ssa to Purge.list With the new SSA neighbor list, half from full can't work, and will break compiles if the old files are in the src directory --- src/Purge.list | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Purge.list b/src/Purge.list index 554c5df824..772961bbdf 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -16,6 +16,9 @@ style_region.h style_neigh_bin.h style_neigh_pair.h style_neigh_stencil.h +# deleted on 01 Mar 2017 +npair_halffull_newton_ssa.cpp +npair_halffull_newton_ssa.h # deleted on ## XXX 2016 accelerator_intel.h neigh_bond.cpp From 3820c5881d5c8af44baa45c12771a7971fb668ca Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 2 Mar 2017 14:02:49 -0700 Subject: [PATCH 179/267] Adding fix_wall_lj93_kokkos --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/fix_wall_lj93_kokkos.cpp | 104 ++++++++++++++++++++++++++++ src/KOKKOS/fix_wall_lj93_kokkos.h | 83 ++++++++++++++++++++++ src/fix_wall.cpp | 2 + src/fix_wall_lj93.h | 4 +- 5 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 src/KOKKOS/fix_wall_lj93_kokkos.cpp create mode 100644 src/KOKKOS/fix_wall_lj93_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ea70ae4ca1..10245631ab 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -105,6 +105,8 @@ action fix_setforce_kokkos.cpp action fix_setforce_kokkos.h action fix_momentum_kokkos.cpp action fix_momentum_kokkos.h +action fix_wall_lj93_kokkos.cpp +action fix_wall_lj93_kokkos.h action fix_wall_reflect_kokkos.cpp action fix_wall_reflect_kokkos.h action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.cpp b/src/KOKKOS/fix_wall_lj93_kokkos.cpp new file mode 100644 index 0000000000..38c7347e97 --- /dev/null +++ b/src/KOKKOS/fix_wall_lj93_kokkos.cpp @@ -0,0 +1,104 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include "fix_wall_lj93_kokkos.h" +#include "atom_kokkos.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixWallLJ93Kokkos::FixWallLJ93Kokkos(LAMMPS *lmp, int narg, char **arg) : + FixWallLJ93(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; +} + +/* ---------------------------------------------------------------------- + interaction of all particles in group with a wall + m = index of wall coeffs + which = xlo,xhi,ylo,yhi,zlo,zhi + error if any particle is on or behind wall +------------------------------------------------------------------------- */ + +template +void FixWallLJ93Kokkos::wall_particle(int m_in, int which, double coord_in) +{ + m = m_in; + coord = coord_in; + + atomKK->sync(execution_space, X_MASK|F_MASK|MASK_MASK); + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + mask = atomKK->k_mask.view(); + DAT::tdual_int_scalar k_oneflag = DAT::tdual_int_scalar("fix:oneflag"); + d_oneflag = k_oneflag.view(); + + int nlocal = atom->nlocal; + + dim = which / 2; + side = which % 2; + if (side == 0) side = -1; + + copymode = 1; + FixWallLJ93KokkosFunctor wp_functor(this); + Kokkos::parallel_reduce(nlocal,wp_functor,ewall); + DeviceType::fence(); + copymode = 0; + + atomKK->modified(execution_space, F_MASK); + + k_oneflag.template modify(); + k_oneflag.template sync(); + if (k_oneflag.h_view()) error->one(FLERR,"Particle on or inside fix wall surface"); +} + +template +KOKKOS_INLINE_FUNCTION +void FixWallLJ93Kokkos::wall_particle_item(int i, value_type ewall) const { + if (mask(i) & groupbit) { + double delta; + if (side < 0) delta = x(i,dim) - coord; + else delta = coord - x(i,dim); + if (delta >= cutoff[m]) return; + if (delta <= 0.0) { + d_oneflag() = 1; + return; + } + double rinv = 1.0/delta; + double r2inv = rinv*rinv; + double r4inv = r2inv*r2inv; + double r10inv = r4inv*r4inv*r2inv; + double fwall = side * (coeff1[m]*r10inv - coeff2[m]*r4inv); + f(i,dim) -= fwall; + ewall[0] += coeff3[m]*r4inv*r4inv*rinv - + coeff4[m]*r2inv*rinv - offset[m]; + ewall[m+1] += fwall; + } +} + +namespace LAMMPS_NS { +template class FixWallLJ93Kokkos; +#ifdef KOKKOS_HAVE_CUDA +template class FixWallLJ93Kokkos; +#endif +} diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.h b/src/KOKKOS/fix_wall_lj93_kokkos.h new file mode 100644 index 0000000000..3cb0a2d44c --- /dev/null +++ b/src/KOKKOS/fix_wall_lj93_kokkos.h @@ -0,0 +1,83 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(wall/lj93/kk,FixWallLJ93Kokkos) +FixStyle(wall/lj93/kk/device,FixWallLJ93Kokkos) +FixStyle(wall/lj93/kk/host,FixWallLJ93Kokkos) + +#else + +#ifndef LMP_FIX_WALL_LJ93_KOKKOS_H +#define LMP_FIX_WALL_LJ93_KOKKOS_H + +#include "fix_wall_lj93.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixWallLJ93Kokkos : public FixWallLJ93 { + public: + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef double value_type[]; + + FixWallLJ93Kokkos(class LAMMPS *, int, char **); + void wall_particle(int, int, double); + + int m; + + KOKKOS_INLINE_FUNCTION + void wall_particle_item(int, value_type) const; + + private: + int dim,side; + double coord; + + typename AT::t_x_array x; + typename AT::t_f_array f; + typename AT::t_int_1d mask; + typename AT::t_int_scalar d_oneflag; +}; + +template +struct FixWallLJ93KokkosFunctor { + typedef DeviceType device_type ; + typedef double value_type[]; + const int value_count; + + FixWallLJ93Kokkos c; + FixWallLJ93KokkosFunctor(FixWallLJ93Kokkos* c_ptr): + c(*c_ptr), + value_count(c.m) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type ewall) const { + c.wall_particle_item(i,ewall); + } +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Particle on or inside fix wall surface + +Particles must be "exterior" to the wall in order for energy/force to +be calculated. + +*/ diff --git a/src/fix_wall.cpp b/src/fix_wall.cpp index 503b87f4a7..8b569cafc6 100644 --- a/src/fix_wall.cpp +++ b/src/fix_wall.cpp @@ -201,6 +201,8 @@ FixWall::FixWall(LAMMPS *lmp, int narg, char **arg) : FixWall::~FixWall() { + if (copymode) return; + for (int m = 0; m < nwall; m++) { delete [] xstr[m]; delete [] estr[m]; diff --git a/src/fix_wall_lj93.h b/src/fix_wall_lj93.h index 40337a5176..3763a02910 100644 --- a/src/fix_wall_lj93.h +++ b/src/fix_wall_lj93.h @@ -28,9 +28,9 @@ class FixWallLJ93 : public FixWall { public: FixWallLJ93(class LAMMPS *, int, char **); void precompute(int); - void wall_particle(int, int, double); + virtual void wall_particle(int, int, double); - private: + protected: double coeff1[6],coeff2[6],coeff3[6],coeff4[6],offset[6]; }; From 7e78921c96a5f822dd5c8942bc1fb204a3c747f3 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 3 Mar 2017 10:12:44 -0500 Subject: [PATCH 180/267] USER-DPD Kokkos: propagate 763a00e8 bugfix to pair_multi_lucy_rx_kokkos.cpp --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 30b49a8e8d..d087546619 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -538,15 +538,15 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca rho_i_contrib += factor; if (NEWTON_PAIR || j < nlocal) a_rho[j] += factor; - } else if (rsq < d_cutsq(itype,jtype)) { - const double rcut = sqrt(d_cutsq(itype,jtype)); - const double tmpFactor = 1.0-sqrt(rsq)/rcut; - const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; - const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; - rho_i_contrib += factor; - if (NEWTON_PAIR || j < nlocal) - a_rho[j] += factor; } + } else if (rsq < d_cutsq(itype,jtype)) { + const double rcut = sqrt(d_cutsq(itype,jtype)); + const double tmpFactor = 1.0-sqrt(rsq)/rcut; + const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; + const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; + rho_i_contrib += factor; + if (NEWTON_PAIR || j < nlocal) + a_rho[j] += factor; } } From a7e855096215b5204b39bc7e54b080095909d6c4 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 3 Mar 2017 10:38:45 -0500 Subject: [PATCH 181/267] USER-DPD Kokkos: turn one_type optimization into a template specialization --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 28 +++++++++++++++++------- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 7 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index d087546619..11dbfabf3a 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -454,7 +454,7 @@ void PairMultiLucyRXKokkos::computeLocalDensity() const double pi = MathConst::MY_PI; const bool newton_pair = force->newton_pair; - one_type = (atom->ntypes == 1); + const bool one_type = (atom->ntypes == 1); // Special cut-off values for when there's only one type. cutsq_type11 = cutsq[1][1]; @@ -471,14 +471,26 @@ void PairMultiLucyRXKokkos::computeLocalDensity() if (neighflag == HALF) { if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } else if (neighflag == HALFTHREAD) { if (newton_pair) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } atomKK->modified(execution_space,DPDRHO_MASK); @@ -498,9 +510,9 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXZero, const } template -template +template KOKKOS_INLINE_FUNCTION -void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLocalDensity, const int &ii) const { +void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLocalDensity, const int &ii) const { // The rho array is atomic for Half/Thread neighbor style @@ -528,7 +540,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double delz = ztmp - x(j,2); const double rsq = delx*delx + dely*dely + delz*delz; - if (one_type) { + if (ONE_TYPE) { if (rsq < cutsq_type11) { const double rcut = rcut_type11; const double r_over_rcut = sqrt(rsq) / rcut; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index 1e84e3efd8..8556319531 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -39,7 +39,7 @@ struct TagPairMultiLucyRXCompute{}; struct TagPairMultiLucyRXZero{}; -template +template struct TagPairMultiLucyRXComputeLocalDensity{}; template @@ -88,9 +88,9 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { KOKKOS_INLINE_FUNCTION void operator()(TagPairMultiLucyRXZero, const int&) const; - template + template KOKKOS_INLINE_FUNCTION - void operator()(TagPairMultiLucyRXComputeLocalDensity, const int&) const; + void operator()(TagPairMultiLucyRXComputeLocalDensity, const int&) const; template KOKKOS_INLINE_FUNCTION @@ -103,7 +103,6 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { int neighflag; int eflag,vflag; - bool one_type; double cutsq_type11; double rcut_type11; double factor_type11; From c468727db0927c45c25a43d0285265ed01a5a765 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 3 Mar 2017 10:49:15 -0700 Subject: [PATCH 182/267] Fixing issue in fix_wall_lj93_kokkos --- src/KOKKOS/fix_wall_lj93_kokkos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.h b/src/KOKKOS/fix_wall_lj93_kokkos.h index 3cb0a2d44c..64f3c59a62 100644 --- a/src/KOKKOS/fix_wall_lj93_kokkos.h +++ b/src/KOKKOS/fix_wall_lj93_kokkos.h @@ -61,7 +61,7 @@ struct FixWallLJ93KokkosFunctor { FixWallLJ93Kokkos c; FixWallLJ93KokkosFunctor(FixWallLJ93Kokkos* c_ptr): c(*c_ptr), - value_count(c.m) {} + value_count(c_ptr->m+1) {} KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type ewall) const { c.wall_particle_item(i,ewall); From 0651ea7f69c33ed026199928476297f9a485e00b Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 3 Mar 2017 12:50:13 -0500 Subject: [PATCH 183/267] USER-DPD Kokkos: work around CUDA not having max_hardware_threads() --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index ba61185a57..99a364eb86 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -114,6 +114,42 @@ void PairDPDfdtEnergyKokkos::init_style() #endif } +#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__) +// CUDA specialization of init_style to properly call rand_pool.init() +template<> +void PairDPDfdtEnergyKokkos::init_style() +{ + PairDPDfdtEnergy::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + } + +#ifdef DPD_USE_RAN_MARS + rand_pool.init(random,seed); +#else + rand_pool.init(seed + comm->me,4*32768 /*fake max_hardware_threads()*/); +#endif +} +#endif + /* ---------------------------------------------------------------------- */ template From 635c448b61bd0279c567ee4426fff6c6ee1ef88d Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 3 Mar 2017 14:57:35 -0500 Subject: [PATCH 184/267] USER-DPD: sort bins for deterministic SSA instead of using Kokkos::Serial --- src/KOKKOS/nbin_ssa_kokkos.cpp | 85 +++++++++++++++++++++++++++++----- src/KOKKOS/nbin_ssa_kokkos.h | 6 +++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 53f3f2fc80..afe016c3f7 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -114,7 +114,6 @@ void NBinSSAKokkos::bin_atoms() { last_bin = update->ntimestep; - int i; int nlocal = atom->nlocal; int nghost = atom->nghost; int nall = nlocal + nghost; @@ -170,19 +169,17 @@ void NBinSSAKokkos::bin_atoms() k_gbincount.sync(); DeviceType::fence(); // FIXME? - Kokkos::parallel_for( -#ifdef ALLOW_NON_DETERMINISTIC_SSA - Kokkos::RangePolicy(nlocal,nall) -#else - Kokkos::RangePolicy(nlocal,nall) -#endif - , KOKKOS_LAMBDA (const int i) { + Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nall), + KOKKOS_LAMBDA (const int i) { const int iAIR = binID(i); if (iAIR > 0) { // include only ghost atoms in an AIR const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); gbins(iAIR, ac) = i; } }); +#ifndef ALLOW_NON_DETERMINISTIC_DPD + Kokkos::parallel_for(Kokkos::RangePolicy(1,8), KOKKOS_LAMBDA (const int i) { sortGhostBin(i); }); +#endif DeviceType::fence(); } c_gbins = gbins; // gbins won't change until the next bin_atoms @@ -200,13 +197,11 @@ void NBinSSAKokkos::bin_atoms() DeviceType::fence(); NPairSSAKokkosBinAtomsFunctor f(*this); -#ifdef ALLOW_NON_DETERMINISTIC_SSA Kokkos::parallel_for(nlocal, f); -#else - Kokkos::parallel_for(Kokkos::RangePolicy(0, nlocal), f); +#ifndef ALLOW_NON_DETERMINISTIC_DPD + Kokkos::parallel_for(mbins, KOKKOS_LAMBDA (const int i) { sortAtomBin(i); }); #endif DeviceType::fence(); - } c_bins = bins; // bins won't change until the next bin_atoms @@ -258,6 +253,72 @@ void NBinSSAKokkos::binIDGhostsItem(const int &i, int &update) const } } +// An implementation of heapsort without recursion +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::sortAtomBin(const int &ibin) const +{ + int n = bincount(ibin); + int i = n/2; + int t; + + do { /* Loops until bin is sorted */ + if (i > 0) { /* First stage - Sorting the heap */ + i--; /* Save its index to i */ + t = bins(ibin, i); /* Save parent value to t */ + } else { /* Second stage - Extracting elements in-place */ + if ((--n) <= 0) return; /* When the heap is empty, we are done */ + t = bins(ibin, n); /* Save last value (it will be overwritten) */ + bins(ibin, n) = bins(ibin, 0); /* Save largest value at the end of the bin */ + } + int parent = i; /* We will start pushing down t from parent */ + int child = i*2 + 1; /* parent's left child */ + /* Sift operation - pushing the value of t down the heap */ + while (child < n) { + /* Choose the largest child */ + if ((child + 1 < n) && (bins(ibin, child + 1) > bins(ibin, child))) ++child; + if (bins(ibin, child) <= t) break; /* t's place is found */ + bins(ibin, parent) = bins(ibin, child); /* Move the largest child up */ + parent = child; /* Move parent pointer to this child */ + child = parent*2+1; /* Find the next child */ + } + bins(ibin, parent) = t; /* We save t in the heap */ + } while(1); +} + +// An implementation of heapsort without recursion +template +KOKKOS_INLINE_FUNCTION +void NBinSSAKokkos::sortGhostBin(const int &ibin) const +{ + int n = gbincount(ibin); + int i = n/2; + int t; + + do { /* Loops until bin is sorted */ + if (i > 0) { /* First stage - Sorting the heap */ + i--; /* Save its index to i */ + t = gbins(ibin, i); /* Save parent value to t */ + } else { /* Second stage - Extracting elements in-place */ + if (--n <= 0) return; /* When the heap is empty, we are done */ + t = gbins(ibin, n); /* Save last value (it will be overwritten) */ + gbins(ibin, n) = gbins(ibin, 0); /* Save largest value at the end of the bin */ + } + int parent = i; /* We will start pushing down t from parent */ + int child = i*2 + 1; /* parent's left child */ + /* Sift operation - pushing the value of t down the heap */ + while (child < n) { + /* Choose the largest child */ + if ((child + 1 < n) && (gbins(ibin, child + 1) > gbins(ibin, child))) ++child; + if (gbins(ibin, child) <= t) break; /* t's place is found */ + gbins(ibin, parent) = gbins(ibin, child); /* Move the largest child up */ + parent = child; /* Move parent pointer to this child */ + child = parent*2+1; /* Find the next child */ + } + gbins(ibin, parent) = t; /* We save t in the heap */ + } while(1); +} + namespace LAMMPS_NS { template class NBinSSAKokkos; #ifdef KOKKOS_HAVE_CUDA diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h index 69f05c9304..add400c573 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.h +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -88,6 +88,12 @@ class NBinSSAKokkos : public NBinStandard { KOKKOS_INLINE_FUNCTION void binIDGhostsItem(const int &i, int &update) const; + KOKKOS_INLINE_FUNCTION + void sortAtomBin(const int &ibin) const; + + KOKKOS_INLINE_FUNCTION + void sortGhostBin(const int &ibin) const; + /* ---------------------------------------------------------------------- convert atom coords into the ssa active interaction region number ------------------------------------------------------------------------- */ From b35895ca128b375595d3da401a88f842b8ad63bf Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 3 Mar 2017 15:21:09 -0500 Subject: [PATCH 185/267] USER-DPD Kokkos: Remove the SSA's ALLOW_NON_DETERMINISTIC_DPD option. There was no measurable performance benefit to turning it on. --- src/KOKKOS/nbin_ssa_kokkos.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index afe016c3f7..0f4a3b8d4f 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -177,9 +177,7 @@ void NBinSSAKokkos::bin_atoms() gbins(iAIR, ac) = i; } }); -#ifndef ALLOW_NON_DETERMINISTIC_DPD Kokkos::parallel_for(Kokkos::RangePolicy(1,8), KOKKOS_LAMBDA (const int i) { sortGhostBin(i); }); -#endif DeviceType::fence(); } c_gbins = gbins; // gbins won't change until the next bin_atoms @@ -198,9 +196,7 @@ void NBinSSAKokkos::bin_atoms() NPairSSAKokkosBinAtomsFunctor f(*this); Kokkos::parallel_for(nlocal, f); -#ifndef ALLOW_NON_DETERMINISTIC_DPD Kokkos::parallel_for(mbins, KOKKOS_LAMBDA (const int i) { sortAtomBin(i); }); -#endif DeviceType::fence(); } c_bins = bins; // bins won't change until the next bin_atoms From c2c22fc2ede651fe5b9da1f7c45a4c05ab543951 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 10:57:19 -0700 Subject: [PATCH 186/267] add missing KOKKOS_INLINE_FUNCTION to fix_shardlow --- src/KOKKOS/Install.sh | 4 ++-- src/KOKKOS/fix_shardlow_kokkos.cpp | 1 + src/KOKKOS/fix_shardlow_kokkos.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 68bd8d2ea8..5707a4e53c 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -28,8 +28,8 @@ action () { # force rebuild of files with LMP_KOKKOS switch -touch ../accelerator_kokkos.h -touch ../memory.h +#touch ../accelerator_kokkos.h +#touch ../memory.h # list of files with optional dependcies diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 996f37257d..0dfbce5033 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -433,6 +433,7 @@ void FixShardlowKokkos::ssa_update_dpd( ------------------------------------------------------------------------- */ template template +KOKKOS_INLINE_FUNCTION void FixShardlowKokkos::ssa_update_dpde( int start_ii, int count, int id ) diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index c4711f5b8b..4dc47709e1 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -124,6 +124,7 @@ class FixShardlowKokkos : public FixShardlow { // template // void ssa_update_dpd(int, int); // Constant Temperature template + KOKKOS_INLINE_FUNCTION void ssa_update_dpde(int, int, int); // Constant Energy }; From 3e8cfb8247fdf7bb19ac21edfd971b3188f4881c Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 11:04:47 -0700 Subject: [PATCH 187/267] The wonders of git commit -a --- src/KOKKOS/Install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 5707a4e53c..68bd8d2ea8 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -28,8 +28,8 @@ action () { # force rebuild of files with LMP_KOKKOS switch -#touch ../accelerator_kokkos.h -#touch ../memory.h +touch ../accelerator_kokkos.h +touch ../memory.h # list of files with optional dependcies From 4a6f27935d26567203c595fdf8214c89f4e94643 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 14:58:40 -0700 Subject: [PATCH 188/267] fix lambda syntax for CUDA KOKKOS_LAMBDA doesn't quite work on CUDA, you have to use LAMMPS_LAMBDA. Also, if you do use LAMMPS_LAMBDA, you need to run on the default device type, i.e. no using lambdas to run on OpenMP when LAMMPS has been compiled for CUDA. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 10 +++++----- src/KOKKOS/nbin_ssa_kokkos.cpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 0dfbce5033..bf026552fa 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -677,13 +677,13 @@ void FixShardlowKokkos::initial_integrate(int vflag) int workItemCt = ssa_phaseLen[workPhase]; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { - Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { int ct = ssa_itemLen(workPhase, workItem); int ii = ssa_itemLoc(workPhase, workItem); ssa_update_dpde(ii, ct, workItem); }); } else { - Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { int ct = ssa_itemLen(workPhase, workItem); int ii = ssa_itemLoc(workPhase, workItem); ssa_update_dpde(ii, ct, workItem); @@ -704,7 +704,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) // memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost); // memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); - Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+nghost), KOKKOS_LAMBDA (const int i) { + Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) { uCond(i) = 0.0; uMech(i) = 0.0; }); @@ -713,13 +713,13 @@ void FixShardlowKokkos::initial_integrate(int vflag) // process neighbors in this AIR if(atom->ntypes > MAX_TYPES_STACKPARAMS) { - Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { int ct = ssa_gitemLen(workPhase, workItem); int ii = ssa_gitemLoc(workPhase, workItem); ssa_update_dpde(ii, ct, workItem); }); } else { - Kokkos::parallel_for(workItemCt, KOKKOS_LAMBDA (const int workItem ) { + Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { int ct = ssa_gitemLen(workPhase, workItem); int ii = ssa_gitemLoc(workPhase, workItem); ssa_update_dpde(ii, ct, workItem); diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 0f4a3b8d4f..b0e2d5be88 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -155,7 +155,7 @@ void NBinSSAKokkos::bin_atoms() DeviceType::fence(); // FIXME? ghosts_per_gbin = 0; NPairSSAKokkosBinIDGhostsFunctor f(*this); - Kokkos::parallel_reduce(Kokkos::RangePolicy(nlocal,nall), f, ghosts_per_gbin); + Kokkos::parallel_reduce(Kokkos::RangePolicy(nlocal,nall), f, ghosts_per_gbin); } // actually bin the ghost atoms @@ -169,15 +169,15 @@ void NBinSSAKokkos::bin_atoms() k_gbincount.sync(); DeviceType::fence(); // FIXME? - Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nall), - KOKKOS_LAMBDA (const int i) { + Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nall), + LAMMPS_LAMBDA (const int i) { const int iAIR = binID(i); if (iAIR > 0) { // include only ghost atoms in an AIR const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); gbins(iAIR, ac) = i; } }); - Kokkos::parallel_for(Kokkos::RangePolicy(1,8), KOKKOS_LAMBDA (const int i) { sortGhostBin(i); }); + Kokkos::parallel_for(Kokkos::RangePolicy(1,8), LAMMPS_LAMBDA (const int i) { sortGhostBin(i); }); DeviceType::fence(); } c_gbins = gbins; // gbins won't change until the next bin_atoms @@ -196,7 +196,7 @@ void NBinSSAKokkos::bin_atoms() NPairSSAKokkosBinAtomsFunctor f(*this); Kokkos::parallel_for(nlocal, f); - Kokkos::parallel_for(mbins, KOKKOS_LAMBDA (const int i) { sortAtomBin(i); }); + Kokkos::parallel_for(mbins, LAMMPS_LAMBDA (const int i) { sortAtomBin(i); }); DeviceType::fence(); } c_bins = bins; // bins won't change until the next bin_atoms From a7d1b571be0ae8969911cba9cf4b4d5a9b879948 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 15:07:07 -0700 Subject: [PATCH 189/267] don't capture "this" in lambdas CUDA lambdas can't capture the calling object very well. make local shallow copies of variables needed. --- src/KOKKOS/nbin_ssa_kokkos.cpp | 21 +++++++++++++++------ src/KOKKOS/nbin_ssa_kokkos.h | 7 +++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index b0e2d5be88..8c991cc0c2 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -169,16 +169,22 @@ void NBinSSAKokkos::bin_atoms() k_gbincount.sync(); DeviceType::fence(); // FIXME? + auto binID_ = binID; + auto gbincount_ = gbincount; + auto gbins_ = gbins; + Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nall), LAMMPS_LAMBDA (const int i) { - const int iAIR = binID(i); + const int iAIR = binID_(i); if (iAIR > 0) { // include only ghost atoms in an AIR - const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1); - gbins(iAIR, ac) = i; + const int ac = Kokkos::atomic_fetch_add(&gbincount_[iAIR], (int)1); + gbins_(iAIR, ac) = i; } }); - Kokkos::parallel_for(Kokkos::RangePolicy(1,8), LAMMPS_LAMBDA (const int i) { sortGhostBin(i); }); - DeviceType::fence(); + Kokkos::parallel_for(Kokkos::RangePolicy(1,8), + LAMMPS_LAMBDA (const int i) { + sortGhostBin(gbincount_, gbins_, i); + }); } c_gbins = gbins; // gbins won't change until the next bin_atoms @@ -285,7 +291,10 @@ void NBinSSAKokkos::sortAtomBin(const int &ibin) const // An implementation of heapsort without recursion template KOKKOS_INLINE_FUNCTION -void NBinSSAKokkos::sortGhostBin(const int &ibin) const +void NBinSSAKokkos::sortGhostBin( + typename AT::t_int_1d gbincount, + typename AT::t_int_2d gbins, + const int &ibin) { int n = gbincount(ibin); int i = n/2; diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h index add400c573..ca1f81953f 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.h +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -91,8 +91,11 @@ class NBinSSAKokkos : public NBinStandard { KOKKOS_INLINE_FUNCTION void sortAtomBin(const int &ibin) const; - KOKKOS_INLINE_FUNCTION - void sortGhostBin(const int &ibin) const; + static KOKKOS_INLINE_FUNCTION + void sortGhostBin( + typename AT::t_int_1d gbincount, + typename AT::t_int_2d gbins, + const int &ibin); /* ---------------------------------------------------------------------- convert atom coords into the ssa active interaction region number From 3e3a24da48d41227f5bfc12f3337a0a39ce7e948 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 15:28:25 -0700 Subject: [PATCH 190/267] consolidate sorting functions two sort functions with different names but identical functionality. making them the same function until we descide to use a different algorithm for atoms and ghosts --- src/KOKKOS/nbin_ssa_kokkos.cpp | 48 ++++++++-------------------------- src/KOKKOS/nbin_ssa_kokkos.h | 5 +--- 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 8c991cc0c2..1fcbbed601 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -183,7 +183,7 @@ void NBinSSAKokkos::bin_atoms() }); Kokkos::parallel_for(Kokkos::RangePolicy(1,8), LAMMPS_LAMBDA (const int i) { - sortGhostBin(gbincount_, gbins_, i); + sortBin(gbincount_, gbins_, i); }); } c_gbins = gbins; // gbins won't change until the next bin_atoms @@ -200,9 +200,16 @@ void NBinSSAKokkos::bin_atoms() Kokkos::parallel_for(mbins, f_zero); DeviceType::fence(); + auto bincount_ = bincount; + auto bins_ = bins; + NPairSSAKokkosBinAtomsFunctor f(*this); Kokkos::parallel_for(nlocal, f); - Kokkos::parallel_for(mbins, LAMMPS_LAMBDA (const int i) { sortAtomBin(i); }); + + Kokkos::parallel_for(mbins, + LAMMPS_LAMBDA (const int i) { + sortBin(bincount_, bins_, i); + }); DeviceType::fence(); } c_bins = bins; // bins won't change until the next bin_atoms @@ -258,40 +265,7 @@ void NBinSSAKokkos::binIDGhostsItem(const int &i, int &update) const // An implementation of heapsort without recursion template KOKKOS_INLINE_FUNCTION -void NBinSSAKokkos::sortAtomBin(const int &ibin) const -{ - int n = bincount(ibin); - int i = n/2; - int t; - - do { /* Loops until bin is sorted */ - if (i > 0) { /* First stage - Sorting the heap */ - i--; /* Save its index to i */ - t = bins(ibin, i); /* Save parent value to t */ - } else { /* Second stage - Extracting elements in-place */ - if ((--n) <= 0) return; /* When the heap is empty, we are done */ - t = bins(ibin, n); /* Save last value (it will be overwritten) */ - bins(ibin, n) = bins(ibin, 0); /* Save largest value at the end of the bin */ - } - int parent = i; /* We will start pushing down t from parent */ - int child = i*2 + 1; /* parent's left child */ - /* Sift operation - pushing the value of t down the heap */ - while (child < n) { - /* Choose the largest child */ - if ((child + 1 < n) && (bins(ibin, child + 1) > bins(ibin, child))) ++child; - if (bins(ibin, child) <= t) break; /* t's place is found */ - bins(ibin, parent) = bins(ibin, child); /* Move the largest child up */ - parent = child; /* Move parent pointer to this child */ - child = parent*2+1; /* Find the next child */ - } - bins(ibin, parent) = t; /* We save t in the heap */ - } while(1); -} - -// An implementation of heapsort without recursion -template -KOKKOS_INLINE_FUNCTION -void NBinSSAKokkos::sortGhostBin( +void NBinSSAKokkos::sortBin( typename AT::t_int_1d gbincount, typename AT::t_int_2d gbins, const int &ibin) @@ -305,7 +279,7 @@ void NBinSSAKokkos::sortGhostBin( i--; /* Save its index to i */ t = gbins(ibin, i); /* Save parent value to t */ } else { /* Second stage - Extracting elements in-place */ - if (--n <= 0) return; /* When the heap is empty, we are done */ + if ((--n) <= 0) return; /* When the heap is empty, we are done */ t = gbins(ibin, n); /* Save last value (it will be overwritten) */ gbins(ibin, n) = gbins(ibin, 0); /* Save largest value at the end of the bin */ } diff --git a/src/KOKKOS/nbin_ssa_kokkos.h b/src/KOKKOS/nbin_ssa_kokkos.h index ca1f81953f..cc98859913 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.h +++ b/src/KOKKOS/nbin_ssa_kokkos.h @@ -88,11 +88,8 @@ class NBinSSAKokkos : public NBinStandard { KOKKOS_INLINE_FUNCTION void binIDGhostsItem(const int &i, int &update) const; - KOKKOS_INLINE_FUNCTION - void sortAtomBin(const int &ibin) const; - static KOKKOS_INLINE_FUNCTION - void sortGhostBin( + void sortBin( typename AT::t_int_1d gbincount, typename AT::t_int_2d gbins, const int &ibin); From 527a573026d14fc068f0e44e8b676d98cd1816d6 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 15:42:26 -0700 Subject: [PATCH 191/267] don't use device views to measure dimensions --- src/KOKKOS/nbin_ssa_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 1fcbbed601..6c9e3a3446 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -72,7 +72,7 @@ NBinSSAKokkos::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp) template void NBinSSAKokkos::bin_atoms_setup(int nall) { - if (mbins > (int) k_bins.d_view.dimension_0()) { + if (mbins > (int) k_bins.h_view.dimension_0()) { k_bins = DAT::tdual_int_2d("NBinSSAKokkos::bins",mbins,atoms_per_bin); bins = k_bins.view(); @@ -82,7 +82,7 @@ void NBinSSAKokkos::bin_atoms_setup(int nall) ghosts_per_gbin = atom->nghost / 7; // estimate needed size - if (ghosts_per_gbin > (int) k_gbins.d_view.dimension_1()) { + if (ghosts_per_gbin > (int) k_gbins.h_view.dimension_1()) { k_gbins = DAT::tdual_int_2d("NBinSSAKokkos::gbins",8,ghosts_per_gbin); gbins = k_gbins.view(); } From b8c72c7bdb547c75c3fc4077353b8f15e4b7b240 Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Mon, 6 Mar 2017 15:51:09 -0700 Subject: [PATCH 192/267] don't query device variables from the host --- src/KOKKOS/npair_ssa_kokkos.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 7eea57d492..a9b59bfc96 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -83,12 +83,12 @@ void NPairSSAKokkos::copy_bin_info() k_gbincount = nbKK->k_gbincount; k_gbins = nbKK->k_gbins; - lbinxlo = nbKK->d_lbinxlo(); - lbinxhi = nbKK->d_lbinxhi(); - lbinylo = nbKK->d_lbinylo(); - lbinyhi = nbKK->d_lbinyhi(); - lbinzlo = nbKK->d_lbinzlo(); - lbinzhi = nbKK->d_lbinzhi(); + lbinxlo = nbKK->h_lbinxlo(); + lbinxhi = nbKK->h_lbinxhi(); + lbinylo = nbKK->h_lbinylo(); + lbinyhi = nbKK->h_lbinyhi(); + lbinzlo = nbKK->h_lbinzlo(); + lbinzhi = nbKK->h_lbinzhi(); } /* ---------------------------------------------------------------------- From d01f09dce237dabe50e22d78881380676db3451a Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 7 Mar 2017 15:23:17 -0500 Subject: [PATCH 193/267] Turn off use of OpenMP in MPIIO/dump_custom_mpiio.cpp if Kokkos is in use. The convert_string_omp() method breaks when Kokkos is also using OpenMP. --- src/MPIIO/dump_custom_mpiio.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MPIIO/dump_custom_mpiio.cpp b/src/MPIIO/dump_custom_mpiio.cpp index 6e48bfa146..0b282b77ef 100644 --- a/src/MPIIO/dump_custom_mpiio.cpp +++ b/src/MPIIO/dump_custom_mpiio.cpp @@ -542,8 +542,8 @@ void DumpCustomMPIIO::write_string(int n, double *mybuf) #if defined(_OPENMP) int nthreads = omp_get_max_threads(); - if (nthreads > 1) - nsme = convert_string_omp(n,mybuf); + if ((nthreads > 1) && !(lmp->kokkos)) + nsme = convert_string_omp(n,mybuf); // not (yet) compatible with Kokkos else nsme = convert_string(n,mybuf); #else From fc23f9cfe897f383db0fa48ce28ea7a2dceb34e8 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 8 Mar 2017 13:07:52 -0700 Subject: [PATCH 194/267] Disable allocation of per-atom arrays in ev_setup for USER-DPD Kokkos styles --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 2 +- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 2 +- src/KOKKOS/pair_hybrid_kokkos.cpp | 2 +- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 2 +- src/KOKKOS/pair_table_rx_kokkos.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 99a364eb86..bd0f08efa6 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -161,7 +161,7 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); + if (eflag || vflag) ev_setup(eflag,vflag,0); else evflag = vflag_fdotr = 0; // reallocate per-atom arrays if necessary diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 962dcfd031..4b0748721c 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -127,7 +127,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) vflag = vflag_in; if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); + if (eflag || vflag) ev_setup(eflag,vflag,0); else evflag = vflag_fdotr = 0; // reallocate per-atom arrays if necessary diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp index 337b56c6ce..629eee156a 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -77,7 +77,7 @@ void PairHybridKokkos::compute(int eflag, int vflag) if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4; - if (eflag || vflag) ev_setup(eflag,vflag); + if (eflag || vflag) ev_setup(eflag,vflag,0); else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 11dbfabf3a..4379cc4001 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -147,7 +147,7 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in vflag = vflag_in; if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); + if (eflag || vflag) ev_setup(eflag,vflag,0); else evflag = vflag_fdotr = 0; // reallocate per-atom arrays if necessary diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 2a1ee2c0b1..cbb1096712 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -627,7 +627,7 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) if (neighflag == FULL) no_virial_fdotr_compute = 1; - if (eflag || vflag) ev_setup(eflag,vflag); + if (eflag || vflag) ev_setup(eflag,vflag,0); else evflag = vflag_fdotr = 0; if (eflag_atom) { From 35e1cf1d6e006b4c4508c9eb34caa8563a0418e3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 8 Mar 2017 20:02:02 -0700 Subject: [PATCH 195/267] Fixing issue with ev_setup in pair_hybrid_kokkos --- src/KOKKOS/pair_hybrid_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp index 629eee156a..337b56c6ce 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -77,7 +77,7 @@ void PairHybridKokkos::compute(int eflag, int vflag) if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4; - if (eflag || vflag) ev_setup(eflag,vflag,0); + if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; From 6f71275db30fcea912d9fb37fb13ab0608cc9d1b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 9 Mar 2017 15:35:07 -0700 Subject: [PATCH 196/267] Add Kokkos version of atom_vec_hybrid_kokkos, without CUDA support --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/atom_kokkos.cpp | 1 + src/KOKKOS/atom_kokkos.h | 1 + src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 1191 +++++++++++++++++++++++++ src/KOKKOS/atom_vec_hybrid_kokkos.h | 161 ++++ 5 files changed, 1356 insertions(+) create mode 100644 src/KOKKOS/atom_vec_hybrid_kokkos.cpp create mode 100644 src/KOKKOS/atom_vec_hybrid_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 68bd8d2ea8..9c11e9321b 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -53,6 +53,8 @@ action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp action atom_vec_dpd_kokkos.h atom_vec_dpd.h action atom_vec_full_kokkos.cpp atom_vec_full.cpp action atom_vec_full_kokkos.h atom_vec_full.h +action atom_vec_hybrid_kokkos.cpp +action atom_vec_hybrid_kokkos.h action atom_vec_kokkos.cpp action atom_vec_kokkos.h action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 97b76ba67c..31b33dbdc9 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -49,6 +49,7 @@ AtomKokkos::~AtomKokkos() memory->destroy_kokkos(k_radius, radius); memory->destroy_kokkos(k_rmass, rmass); memory->destroy_kokkos(k_omega, omega); + memory->destroy_kokkos(k_angmom, angmom); memory->destroy_kokkos(k_torque, torque); memory->destroy_kokkos(k_nspecial, nspecial); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index cf454bcd0c..2245023189 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -34,6 +34,7 @@ class AtomKokkos : public Atom { DAT::tdual_float_1d k_radius; DAT::tdual_float_1d k_rmass; DAT::tdual_v_array k_omega; + DAT::tdual_v_array k_angmom; DAT::tdual_f_array k_torque; DAT::tdual_tagint_1d k_molecule; DAT::tdual_int_2d k_nspecial; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp new file mode 100644 index 0000000000..0c9d261be5 --- /dev/null +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -0,0 +1,1191 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include "atom_vec_kokkos.h" +#include "atom_vec_hybrid_kokkos.h" +#include "atom_kokkos.h" +#include "domain.h" +#include "modify.h" +#include "fix.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +AtomVecHybridKokkos::AtomVecHybridKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) {} + +/* ---------------------------------------------------------------------- */ + +AtomVecHybridKokkos::~AtomVecHybridKokkos() +{ + for (int k = 0; k < nstyles; k++) delete styles[k]; + delete [] styles; + for (int k = 0; k < nstyles; k++) delete [] keywords[k]; + delete [] keywords; +} + +/* ---------------------------------------------------------------------- + process sub-style args +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::process_args(int narg, char **arg) +{ + // build list of all known atom styles + + build_styles(); + + // allocate list of sub-styles as big as possibly needed if no extra args + + styles = new AtomVec*[narg]; + keywords = new char*[narg]; + + // allocate each sub-style + // call process_args() with set of args that are not atom style names + // use known_style() to determine which args these are + + int i,jarg,dummy; + + int iarg = 0; + nstyles = 0; + while (iarg < narg) { + if (strcmp(arg[iarg],"hybrid") == 0) + error->all(FLERR,"Atom style hybrid cannot have hybrid as an argument"); + for (i = 0; i < nstyles; i++) + if (strcmp(arg[iarg],keywords[i]) == 0) + error->all(FLERR,"Atom style hybrid cannot use same atom style twice"); + styles[nstyles] = atom->new_avec(arg[iarg],1,dummy); + keywords[nstyles] = new char[strlen(arg[iarg])+1]; + strcpy(keywords[nstyles],arg[iarg]); + jarg = iarg + 1; + while (jarg < narg && !known_style(arg[jarg])) jarg++; + styles[nstyles]->process_args(jarg-iarg-1,&arg[iarg+1]); + iarg = jarg; + nstyles++; + } + + // free allstyles created by build_styles() + + for (int i = 0; i < nallstyles; i++) delete [] allstyles[i]; + delete [] allstyles; + + // hybrid settings are MAX or MIN of sub-style settings + // hybrid sizes are minimal values plus extra values for each sub-style + + molecular = 0; + comm_x_only = comm_f_only = 1; + + size_forward = 3; + size_reverse = 3; + size_border = 6; + size_data_atom = 5; + size_data_vel = 4; + xcol_data = 3; + + for (int k = 0; k < nstyles; k++) { + if ((styles[k]->molecular == 1 && molecular == 2) || + (styles[k]->molecular == 2 && molecular == 1)) + error->all(FLERR,"Cannot mix molecular and molecule template " + "atom styles"); + molecular = MAX(molecular,styles[k]->molecular); + + bonds_allow = MAX(bonds_allow,styles[k]->bonds_allow); + angles_allow = MAX(angles_allow,styles[k]->angles_allow); + dihedrals_allow = MAX(dihedrals_allow,styles[k]->dihedrals_allow); + impropers_allow = MAX(impropers_allow,styles[k]->impropers_allow); + mass_type = MAX(mass_type,styles[k]->mass_type); + dipole_type = MAX(dipole_type,styles[k]->dipole_type); + forceclearflag = MAX(forceclearflag,styles[k]->forceclearflag); + + if (styles[k]->molecular == 2) onemols = styles[k]->onemols; + + comm_x_only = MIN(comm_x_only,styles[k]->comm_x_only); + comm_f_only = MIN(comm_f_only,styles[k]->comm_f_only); + size_forward += styles[k]->size_forward - 3; + size_reverse += styles[k]->size_reverse - 3; + size_border += styles[k]->size_border - 6; + size_data_atom += styles[k]->size_data_atom - 5; + size_data_vel += styles[k]->size_data_vel - 4; + } + + size_velocity = 3; + if (atom->omega_flag) size_velocity += 3; + if (atom->angmom_flag) size_velocity += 3; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::init() +{ + AtomVec::init(); + for (int k = 0; k < nstyles; k++) styles[k]->init(); + +#ifdef KOKKOS_HAVE_CUDA + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support CUDA"); +#endif +} + +/* ---------------------------------------------------------------------- + grow atom arrays + n = 0 grows arrays by a chunk + n > 0 allocates arrays to size n +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::grow(int n) +{ + if (n == 0) grow_nmax(); + else nmax = n; + atom->nmax = nmax; + if (nmax < 0 || nmax > MAXSMALLINT) + error->one(FLERR,"Per-processor system is too big"); + + // sub-styles perform all reallocation + // turn off nextra_grow so hybrid can do that once below + + int tmp = atom->nextra_grow; + atom->nextra_grow = 0; + for (int k = 0; k < nstyles; k++) styles[k]->grow(nmax); + atom->nextra_grow = tmp; + + // insure hybrid local ptrs and sub-style ptrs are up to date + // for sub-styles, do this in case + // multiple sub-style reallocs of same array occurred + + grow_reset(); + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); +} + +/* ---------------------------------------------------------------------- + reset local array ptrs +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::grow_reset() +{ + tag = atomKK->tag; + d_tag = atomKK->k_tag.d_view; + h_tag = atomKK->k_tag.h_view; + + type = atomKK->type; + d_type = atomKK->k_type.d_view; + h_type = atomKK->k_type.h_view; + + mask = atomKK->mask; + d_mask = atomKK->k_mask.d_view; + h_mask = atomKK->k_mask.h_view; + + image = atomKK->image; + d_image = atomKK->k_image.d_view; + h_image = atomKK->k_image.h_view; + + x = atomKK->x; + d_x = atomKK->k_x.d_view; + h_x = atomKK->k_x.h_view; + + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + + f = atomKK->f; + d_f = atomKK->k_f.d_view; + h_f = atomKK->k_f.h_view; + + v = atomKK->v; + d_v = atomKK->k_v.d_view; + h_v = atomKK->k_v.h_view; + + omega = atomKK->omega; + d_omega = atomKK->k_omega.d_view; + h_omega = atomKK->k_omega.h_view; + + angmom = atomKK->angmom; + d_angmom = atomKK->k_angmom.d_view; + h_angmom = atomKK->k_angmom.h_view; + + for (int k = 0; k < nstyles; k++) styles[k]->grow_reset(); +} + +/* ---------------------------------------------------------------------- + copy atom I info to atom J for all sub-styles +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::copy(int i, int j, int delflag) +{ + int tmp = atom->nextra_grow; + atom->nextra_grow = 0; + for (int k = 0; k < nstyles; k++) styles[k]->copy(i,j,delflag); + atom->nextra_grow = tmp; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::clear_bonus() +{ + for (int k = 0; k < nstyles; k++) styles[k]->clear_bonus(); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::force_clear(int n, size_t nbytes) +{ + for (int k = 0; k < nstyles; k++) + if (styles[k]->forceclearflag) styles[k]->force_clear(n,nbytes); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +void AtomVecHybridKokkos::unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +int AtomVecHybridKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +int AtomVecHybridKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +void AtomVecHybridKokkos::unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +int AtomVecHybridKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} +int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space) +{ + error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm"); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,k,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + } + } + + // pack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_comm_hybrid(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,k,m; + double dx,dy,dz,dvx,dvy,dvz; + int omega_flag = atom->omega_flag; + int angmom_flag = atom->angmom_flag; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; + dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; + dz = pbc[2]*domain->zprd; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + if (h_mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } + } + + // pack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_comm_hybrid(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf) +{ + int i,k,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + } + + // unpack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->unpack_comm_hybrid(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) +{ + int i,k,m,last; + int omega_flag = atom->omega_flag; + int angmom_flag = atom->angmom_flag; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + if (omega_flag) { + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + if (angmom_flag) { + h_angmom(i,0) = buf[m++]; + h_angmom(i,1) = buf[m++]; + h_angmom(i,2) = buf[m++]; + } + } + + // unpack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->unpack_comm_hybrid(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf) +{ + int i,k,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + buf[m++] = h_f(i,0); + buf[m++] = h_f(i,1); + buf[m++] = h_f(i,2); + } + + // pack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_reverse_hybrid(n,first,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) +{ + int i,j,k,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + h_f(j,0) += buf[m++]; + h_f(j,1) += buf[m++]; + h_f(j,2) += buf[m++]; + } + + // unpack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->unpack_reverse_hybrid(n,list,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,k,m; + double dx,dy,dz; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + } + } + + // pack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_border_hybrid(n,list,&buf[m]); + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j,k,m; + double dx,dy,dz,dvx,dvy,dvz; + int omega_flag = atom->omega_flag; + int angmom_flag = atom->angmom_flag; + + m = 0; + if (pbc_flag == 0) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0); + buf[m++] = h_x(j,1); + buf[m++] = h_x(j,2); + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } else { + if (domain->triclinic == 0) { + dx = pbc[0]*domain->xprd; + dy = pbc[1]*domain->yprd; + dz = pbc[2]*domain->zprd; + } else { + dx = pbc[0]; + dy = pbc[1]; + dz = pbc[2]; + } + if (!deform_vremap) { + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } else { + dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4]; + dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3]; + dvz = pbc[2]*h_rate[2]; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = h_x(j,0) + dx; + buf[m++] = h_x(j,1) + dy; + buf[m++] = h_x(j,2) + dz; + buf[m++] = ubuf(h_tag[j]).d; + buf[m++] = ubuf(h_type[j]).d; + buf[m++] = ubuf(h_mask[j]).d; + if (h_mask[i] & deform_groupbit) { + buf[m++] = h_v(j,0) + dvx; + buf[m++] = h_v(j,1) + dvy; + buf[m++] = h_v(j,2) + dvz; + } else { + buf[m++] = h_v(j,0); + buf[m++] = h_v(j,1); + buf[m++] = h_v(j,2); + } + if (omega_flag) { + buf[m++] = h_omega(j,0); + buf[m++] = h_omega(j,1); + buf[m++] = h_omega(j,2); + } + if (angmom_flag) { + buf[m++] = h_angmom(j,0); + buf[m++] = h_angmom(j,1); + buf[m++] = h_angmom(j,2); + } + } + } + } + + // pack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_border_hybrid(n,list,&buf[m]); + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]->pack_border(n,list,&buf[m]); + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf) +{ + int i,k,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + } + + // unpack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->unpack_border_hybrid(n,first,&buf[m]); + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf) +{ + int i,k,m,last; + int omega_flag = atom->omega_flag; + int angmom_flag = atom->angmom_flag; + + m = 0; + last = first + n; + for (i = first; i < last; i++) { + if (i == nmax) grow(0); + h_x(i,0) = buf[m++]; + h_x(i,1) = buf[m++]; + h_x(i,2) = buf[m++]; + h_tag[i] = (tagint) ubuf(buf[m++]).i; + h_type[i] = (int) ubuf(buf[m++]).i; + h_mask[i] = (int) ubuf(buf[m++]).i; + h_v(i,0) = buf[m++]; + h_v(i,1) = buf[m++]; + h_v(i,2) = buf[m++]; + if (omega_flag) { + h_omega(i,0) = buf[m++]; + h_omega(i,1) = buf[m++]; + h_omega(i,2) = buf[m++]; + } + if (angmom_flag) { + h_angmom(i,0) = buf[m++]; + h_angmom(i,1) = buf[m++]; + h_angmom(i,2) = buf[m++]; + } + } + + // unpack sub-style contributions as contiguous chunks + + for (k = 0; k < nstyles; k++) + m += styles[k]->unpack_border_hybrid(n,first,&buf[m]); + + if (atom->nextra_border) + for (int iextra = 0; iextra < atom->nextra_border; iextra++) + m += modify->fix[atom->extra_border[iextra]]-> + unpack_border(n,first,&buf[m]); +} + +/* ---------------------------------------------------------------------- + pack data for atom I for sending to another proc + pack each sub-style one after the other +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_exchange(int i, double *buf) +{ + int k,m; + + int tmp = atom->nextra_grow; + atom->nextra_grow = 0; + + m = 0; + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_exchange(i,&buf[m]); + + atom->nextra_grow = tmp; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]->pack_exchange(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for single atom received from another proc + unpack each sub-style one after the other + grow() occurs here so arrays for all sub-styles are grown +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::unpack_exchange(double *buf) +{ + int k,m; + + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + int tmp = atom->nextra_grow; + atom->nextra_grow = 0; + + m = 0; + for (k = 0; k < nstyles; k++) { + m += styles[k]->unpack_exchange(&buf[m]); + atom->nlocal--; + } + + atom->nextra_grow = tmp; + + if (atom->nextra_grow) + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) + m += modify->fix[atom->extra_grow[iextra]]-> + unpack_exchange(nlocal,&buf[m]); + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + size of restart data for all atoms owned by this proc + include extra data stored by fixes +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::size_restart() +{ + int tmp = atom->nextra_restart; + atom->nextra_restart = 0; + + int n = 0; + for (int k = 0; k < nstyles; k++) + n += styles[k]->size_restart(); + + atom->nextra_restart = tmp; + + int nlocal = atom->nlocal; + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + for (int i = 0; i < nlocal; i++) + n += modify->fix[atom->extra_restart[iextra]]->size_restart(i); + + return n; +} + +/* ---------------------------------------------------------------------- + pack atom I's data for restart file including extra quantities + xyz must be 1st 3 values, so that read_restart can test on them + pack each sub-style one after the other +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::pack_restart(int i, double *buf) +{ + int tmp = atom->nextra_restart; + atom->nextra_restart = 0; + + int m = 0; + for (int k = 0; k < nstyles; k++) + m += styles[k]->pack_restart(i,&buf[m]); + + atom->nextra_restart = tmp; + + if (atom->nextra_restart) + for (int iextra = 0; iextra < atom->nextra_restart; iextra++) + m += modify->fix[atom->extra_restart[iextra]]->pack_restart(i,&buf[m]); + + buf[0] = m; + return m; +} + +/* ---------------------------------------------------------------------- + unpack data for one atom from restart file including extra quantities + unpack each sub-style one after the other + grow() occurs here so arrays for all sub-styles are grown +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::unpack_restart(double *buf) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) { + grow(0); + if (atom->nextra_store) + memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); + } + + int tmp = atom->nextra_store; + atom->nextra_store = 0; + + int m = 0; + for (int k = 0; k < nstyles; k++) { + m += styles[k]->unpack_restart(&buf[m]); + atom->nlocal--; + } + atom->nextra_store = tmp; + + double **extra = atom->extra; + if (atom->nextra_store) { + int size = static_cast (buf[0]) - m; + for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; + } + + atom->nlocal++; + return m; +} + +/* ---------------------------------------------------------------------- + create one atom of itype at coord + create each sub-style one after the other + grow() occurs here so arrays for all sub-styles are grown +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::create_atom(int itype, double *coord) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + for (int k = 0; k < nstyles; k++) { + styles[k]->create_atom(itype,coord); + atom->nlocal--; + } + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Atoms section of data file + grow() occurs here so arrays for all sub-styles are grown +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values) +{ + int nlocal = atom->nlocal; + if (nlocal == nmax) grow(0); + + h_tag[nlocal] = ATOTAGINT(values[0]); + h_type[nlocal] = atoi(values[1]); + if (h_type[nlocal] <= 0 || h_type[nlocal] > atom->ntypes) + error->one(FLERR,"Invalid atom h_type in Atoms section of data file"); + + h_x(nlocal,0) = coord[0]; + h_x(nlocal,1) = coord[1]; + h_x(nlocal,2) = coord[2]; + + h_image[nlocal] = imagetmp; + h_mask[nlocal] = 1; + + h_v(nlocal,0) = 0.0; + h_v(nlocal,1) = 0.0; + h_v(nlocal,2) = 0.0; + if (atom->omega_flag) { + h_omega(nlocal,0) = 0.0; + h_omega(nlocal,1) = 0.0; + h_omega(nlocal,2) = 0.0; + } + if (atom->angmom_flag) { + h_angmom(nlocal,0) = 0.0; + h_angmom(nlocal,1) = 0.0; + h_angmom(nlocal,2) = 0.0; + } + + // each sub-style parses sub-style specific values + + int m = 5; + for (int k = 0; k < nstyles; k++) + m += styles[k]->data_atom_hybrid(nlocal,&values[m]); + + atom->nlocal++; +} + +/* ---------------------------------------------------------------------- + unpack one line from Velocities section of data file +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::data_vel(int m, char **values) +{ + h_v(m,0) = atof(values[0]); + h_v(m,1) = atof(values[1]); + h_v(m,2) = atof(values[2]); + + // each sub-style parses sub-style specific values + + int n = 3; + for (int k = 0; k < nstyles; k++) + n += styles[k]->data_vel_hybrid(m,&values[n]); +} + +/* ---------------------------------------------------------------------- + pack atom info for data file including 3 image flags +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::pack_data(double **buf) +{ + int k,m; + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = ubuf(h_type[i]).d; + buf[i][2] = h_x(i,0); + buf[i][3] = h_x(i,1); + buf[i][4] = h_x(i,2); + + m = 5; + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_data_hybrid(i,&buf[i][m]); + + buf[i][m] = ubuf((h_image[i] & IMGMASK) - IMGMAX).d; + buf[i][m+1] = ubuf((h_image[i] >> IMGBITS & IMGMASK) - IMGMAX).d; + buf[i][m+2] = ubuf((h_image[i] >> IMG2BITS) - IMGMAX).d; + } +} + +/* ---------------------------------------------------------------------- + write atom info to data file including 3 h_image flags +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf) +{ + int k,m; + + for (int i = 0; i < n; i++) { + fprintf(fp,TAGINT_FORMAT " %d %-1.16e %-1.16e %-1.16e", + (tagint) ubuf(buf[i][0]).i,(int) ubuf(buf[i][1]).i, + buf[i][2],buf[i][3],buf[i][4]); + + m = 5; + for (k = 0; k < nstyles; k++) + m += styles[k]->write_data_hybrid(fp,&buf[i][m]); + + fprintf(fp," %d %d %d\n", + (int) ubuf(buf[i][m]).i,(int) ubuf(buf[i][m+1]).i, + (int) ubuf(buf[i][m+2]).i); + } +} + +/* ---------------------------------------------------------------------- + pack velocity info for data file +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::pack_vel(double **buf) +{ + int k,m; + + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + buf[i][0] = ubuf(h_tag[i]).d; + buf[i][1] = h_v(i,0); + buf[i][2] = h_v(i,1); + buf[i][3] = h_v(i,2); + + m = 4; + for (k = 0; k < nstyles; k++) + m += styles[k]->pack_vel_hybrid(i,&buf[i][m]); + } +} + +/* ---------------------------------------------------------------------- + write velocity info to data file +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::write_vel(FILE *fp, int n, double **buf) +{ + int k,m; + + for (int i = 0; i < n; i++) { + fprintf(fp,TAGINT_FORMAT " %g %g %g", + (tagint) ubuf(buf[i][0]).i,buf[i][1],buf[i][2],buf[i][3]); + + m = 4; + for (k = 0; k < nstyles; k++) + m += styles[k]->write_vel_hybrid(fp,&buf[i][m]); + + fprintf(fp,"\n"); + } +} + +/* ---------------------------------------------------------------------- + assign an index to named atom property and return index + returned value encodes which sub-style and index returned by sub-style + return -1 if name is unknown to any sub-styles +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::property_atom(char *name) +{ + for (int k = 0; k < nstyles; k++) { + int index = styles[k]->property_atom(name); + if (index >= 0) return index*nstyles + k; + } + return -1; +} + +/* ---------------------------------------------------------------------- + pack per-atom data into buf for ComputePropertyAtom + index maps to data specific to this atom style +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::pack_property_atom(int multiindex, double *buf, + int nvalues, int groupbit) +{ + int k = multiindex % nstyles; + int index = multiindex/nstyles; + styles[k]->pack_property_atom(index,buf,nvalues,groupbit); +} + +/* ---------------------------------------------------------------------- + allstyles = list of all atom styles in this LAMMPS executable +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::build_styles() +{ + nallstyles = 0; +#define ATOM_CLASS +#define AtomStyle(key,Class) nallstyles++; +#include "style_atom.h" +#undef AtomStyle +#undef ATOM_CLASS + + allstyles = new char*[nallstyles]; + + int n; + nallstyles = 0; +#define ATOM_CLASS +#define AtomStyle(key,Class) \ + n = strlen(#key) + 1; \ + allstyles[nallstyles] = new char[n]; \ + strcpy(allstyles[nallstyles],#key); \ + nallstyles++; +#include "style_atom.h" +#undef AtomStyle +#undef ATOM_CLASS +} + +/* ---------------------------------------------------------------------- + allstyles = list of all known atom styles +------------------------------------------------------------------------- */ + +int AtomVecHybridKokkos::known_style(char *str) +{ + for (int i = 0; i < nallstyles; i++) + if (strcmp(str,allstyles[i]) == 0) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- + return # of bytes of allocated memory +------------------------------------------------------------------------- */ + +bigint AtomVecHybridKokkos::memory_usage() +{ + bigint bytes = 0; + for (int k = 0; k < nstyles; k++) bytes += styles[k]->memory_usage(); + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::sync(ExecutionSpace space, unsigned int h_mask) +{ + for (int k = 0; k < nstyles; k++) ((AtomVecKokkos*) styles[k])->sync(space,h_mask); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int h_mask) +{ + for (int k = 0; k < nstyles; k++) ((AtomVecKokkos*) styles[k])->sync_overlapping_device(space,h_mask); +} + +/* ---------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::modified(ExecutionSpace space, unsigned int h_mask) +{ + for (int k = 0; k < nstyles; k++) ((AtomVecKokkos*) styles[k])->modified(space,h_mask); +} diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h new file mode 100644 index 0000000000..802314bfa6 --- /dev/null +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -0,0 +1,161 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef ATOM_CLASS + +AtomStyle(hybrid/kk,AtomVecHybridKokkos) + +#else + +#ifndef LMP_ATOM_VEC_HYBRID_KOKKOS_H +#define LMP_ATOM_VEC_HYBRID_KOKKOS_H + +#include +#include "atom_vec.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class AtomVecHybridKokkos : public AtomVecKokkos { + public: + int nstyles; + class AtomVec **styles; + char **keywords; + + AtomVecHybridKokkos(class LAMMPS *); + ~AtomVecHybridKokkos(); + void process_args(int, char **); + void init(); + void grow(int); + void grow_reset(); + void copy(int, int, int); + void clear_bonus(); + void force_clear(int, size_t); + int pack_comm(int, int *, double *, int, int *); + int pack_comm_vel(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + void unpack_comm_vel(int, int, double *); + int pack_reverse(int, int, double *); + void unpack_reverse(int, int *, double *); + int pack_border(int, int *, double *, int, int *); + int pack_border_vel(int, int *, double *, int, int *); + void unpack_border(int, int, double *); + void unpack_border_vel(int, int, double *); + int pack_exchange(int, double *); + int unpack_exchange(double *); + int size_restart(); + int pack_restart(int, double *); + int unpack_restart(double *); + void create_atom(int, double *); + void data_atom(double *, imageint, char **); + int data_atom_hybrid(int, char **) {return 0;} + void data_vel(int, char **); + void pack_data(double **); + void write_data(FILE *, int, double **); + void pack_vel(double **); + void write_vel(FILE *, int, double **); + int property_atom(char *); + void pack_property_atom(int, double *, int, int); + bigint memory_usage(); + + int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, + const int & iswap, + const DAT::tdual_xfloat_2d &buf, + const int &pbc_flag, const int pbc[]); + void unpack_comm_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf); + int pack_comm_self(const int &n, const DAT::tdual_int_2d &list, + const int & iswap, const int nfirst, + const int &pbc_flag, const int pbc[]); + int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, + DAT::tdual_xfloat_2d buf,int iswap, + int pbc_flag, int *pbc, ExecutionSpace space); + void unpack_border_kokkos(const int &n, const int &nfirst, + const DAT::tdual_xfloat_2d &buf, + ExecutionSpace space); + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space, int dim, + X_FLOAT lo, X_FLOAT hi); + int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, + int nlocal, int dim, X_FLOAT lo, X_FLOAT hi, + ExecutionSpace space); + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + + private: + tagint *tag; + int *type,*mask; + imageint *image; + double **x,**v,**f; + double **omega,**angmom; + + DAT::t_tagint_1d d_tag; + DAT::t_int_1d d_type, d_mask; + HAT::t_tagint_1d h_tag; + HAT::t_int_1d h_type, h_mask; + + DAT::t_imageint_1d d_image; + HAT::t_imageint_1d h_image; + + DAT::t_x_array d_x; + DAT::t_v_array d_v; + DAT::t_f_array d_f; + HAT::t_x_array h_x; + HAT::t_v_array h_v; + HAT::t_f_array h_f; + + DAT::t_v_array d_omega, d_angmom; + HAT::t_v_array h_omega, h_angmom; + + DAT::tdual_int_1d k_count; + + int nallstyles; + char **allstyles; + + void build_styles(); + int known_style(char *); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Atom style hybrid cannot have hybrid as an argument + +Self-explanatory. + +E: Atom style hybrid cannot use same atom style twice + +Self-explanatory. + +E: Cannot mix molecular and molecule template atom styles + +Self-explanatory. + +E: Per-processor system is too big + +The number of owned atoms plus ghost atoms on a single +processor must fit in 32-bit integer. + +E: Invalid atom type in Atoms section of data file + +Atom types must range from 1 to specified # of types. + +*/ From d6f6c6faf1ecd6b25b1297b2f546632f3864fa45 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 12 Mar 2017 16:05:28 -0400 Subject: [PATCH 197/267] USER-DPD: Make newton-off warning in pair_dpd_fdt* be more selective. If using fix_shardlow, the pair_dpd_fdt* styles are okay with newton off, because the stocastic forces are thus only done in fix_shardlow. --- src/USER-DPD/pair_dpd_fdt.cpp | 11 +++++------ src/USER-DPD/pair_dpd_fdt_energy.cpp | 11 +++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/USER-DPD/pair_dpd_fdt.cpp b/src/USER-DPD/pair_dpd_fdt.cpp index 90aa4f1eaf..987755db8a 100644 --- a/src/USER-DPD/pair_dpd_fdt.cpp +++ b/src/USER-DPD/pair_dpd_fdt.cpp @@ -316,18 +316,17 @@ void PairDPDfdt::init_style() if (comm->ghost_velocity == 0) error->all(FLERR,"Pair dpd/fdt requires ghost atoms store velocity"); - // if newton off, forces between atoms ij will be double computed - // using different random numbers - - if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR, - "Pair dpd/fdt requires newton pair on"); - splitFDT_flag = false; int irequest = neighbor->request(this,instance_me); for (int i = 0; i < modify->nfix; i++) if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){ splitFDT_flag = true; } + + // if newton off, forces between atoms ij will be double computed + // using different random numbers if splitFDT_flag is false + if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR, + "Pair dpd/fdt requires newton pair on if not also using fix shardlow"); } /* ---------------------------------------------------------------------- diff --git a/src/USER-DPD/pair_dpd_fdt_energy.cpp b/src/USER-DPD/pair_dpd_fdt_energy.cpp index ad6310a283..bf86f95b5f 100644 --- a/src/USER-DPD/pair_dpd_fdt_energy.cpp +++ b/src/USER-DPD/pair_dpd_fdt_energy.cpp @@ -405,12 +405,6 @@ void PairDPDfdtEnergy::init_style() if (comm->ghost_velocity == 0) error->all(FLERR,"Pair dpd/fdt/energy requires ghost atoms store velocity"); - // if newton off, forces between atoms ij will be double computed - // using different random numbers - - if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR, - "Pair dpd/fdt/energy requires newton pair on"); - splitFDT_flag = false; int irequest = neighbor->request(this,instance_me); for (int i = 0; i < modify->nfix; i++) @@ -418,6 +412,11 @@ void PairDPDfdtEnergy::init_style() splitFDT_flag = true; } + // if newton off, forces between atoms ij will be double computed + // using different random numbers if splitFDT_flag is false + if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR, + "Pair dpd/fdt/energy requires newton pair on if not also using fix shardlow"); + bool eos_flag = false; for (int i = 0; i < modify->nfix; i++) if (strncmp(modify->fix[i]->style,"eos",3) == 0) eos_flag = true; From e908b8dbea0284460ea070cd98862dc4abd5d4c1 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 12 Mar 2017 16:20:09 -0400 Subject: [PATCH 198/267] USER-DPD Kokkos: correct some error messages --- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 4 ++-- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index bd0f08efa6..1c63b9af95 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -104,7 +104,7 @@ void PairDPDfdtEnergyKokkos::init_style() neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; } else { - error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk"); } #ifdef DPD_USE_RAN_MARS @@ -139,7 +139,7 @@ void PairDPDfdtEnergyKokkos::init_style() neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; } else { - error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk"); } #ifdef DPD_USE_RAN_MARS diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 4b0748721c..e22a4bff22 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -112,7 +112,7 @@ void PairExp6rxKokkos::init_style() neighbor->requests[irequest]->full = 0; neighbor->requests[irequest]->half = 1; } else { - error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk"); + error->all(FLERR,"Cannot use chosen neighbor list style with exp6/rx/kk"); } } @@ -1242,4 +1242,4 @@ template class PairExp6rxKokkos; #ifdef KOKKOS_HAVE_CUDA template class PairExp6rxKokkos; #endif -} \ No newline at end of file +} From b1b377cb594738d35b31ba33ea2d125e78483ee3 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 12 Mar 2017 17:48:51 -0400 Subject: [PATCH 199/267] USER-DPD: fix_shardlow's neighbor request needs "newton on" override. Even if other stuff is doing newton off, SSA must have it turned on. --- src/USER-DPD/fix_shardlow.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index 5132d937ea..2b7ef9314b 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -132,10 +132,11 @@ int FixShardlow::setmask() void FixShardlow::init() { int irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->pair = 0; - neighbor->requests[irequest]->fix = 1; - neighbor->requests[irequest]->ghost= 1; - neighbor->requests[irequest]->ssa = 1; + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->ghost = 1; + neighbor->requests[irequest]->ssa = 1; + neighbor->requests[irequest]->newton = 1; // SSA requires newton on } /* ---------------------------------------------------------------------- */ From d5eceebf3283cd460a4230672c582b952bef36f0 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 13 Mar 2017 01:56:00 -0400 Subject: [PATCH 200/267] USER-DPD Kokkos: add support for full neighbor lists. Note: "newton on" still required if using non-kokkos pair styles or fixes. Non-kokkos pairs/fixes don't expect their half lists with newton off, which happens if newton is turned off globally by kokkos via commandline. Note2: Regardless, fix_shardlow* will still use half lists and newton on. --- src/KOKKOS/fix_rx_kokkos.cpp | 9 +++-- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 44 +++++++++++++++++++---- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 16 ++++++--- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 27 +++++++++++--- src/KOKKOS/pair_table_rx_kokkos.cpp | 16 +++++++++ 5 files changed, 96 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index 08a20ac9a7..ac81e5c2a7 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -1450,6 +1450,11 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF computeLocalTemperature<_wtflag, _localTempFlag, true , HALFTHREAD> (); \ else \ computeLocalTemperature<_wtflag, _localTempFlag, false, HALFTHREAD> (); \ + else if (neighflag == FULL) \ + if (newton_pair) \ + computeLocalTemperature<_wtflag, _localTempFlag, true , FULL> (); \ + else \ + computeLocalTemperature<_wtflag, _localTempFlag, false, FULL> (); \ } // Are there is no other options than wtFlag = (0)LUCY and localTempFlag = NONE : HARMONIC? @@ -1934,12 +1939,12 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_firstPairOperator::compute(int eflag_in, int vflag_in) if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } else { if (neighflag == HALF) { @@ -251,6 +259,14 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } } @@ -291,6 +307,14 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } else { if (neighflag == HALF) { @@ -309,6 +333,14 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } } @@ -405,7 +437,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp fx_i += delx*fpair; fy_i += dely*fpair; fz_i += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { a_f(j,0) -= delx*fpair; a_f(j,1) -= dely*fpair; a_f(j,2) -= delz*fpair; @@ -418,7 +450,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeSp evdwl = 0.5*a0_ij*cut_ij * wd; evdwl *= factor_dpd; if (EVFLAG) - ev.evdwl += ((NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); @@ -522,7 +554,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo fx_i += delx*fpair; fy_i += dely*fpair; fz_i += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { a_f(j,0) -= delx*fpair; a_f(j,1) -= dely*fpair; a_f(j,2) -= delz*fpair; @@ -548,7 +580,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo uTmp *= 0.5; a_duMech[i] += uTmp; - if (NEWTON_PAIR || j < nlocal) { + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { a_duMech[j] += uTmp; } @@ -562,7 +594,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo uTmp += randPair; a_duCond[i] += uTmp; - if (NEWTON_PAIR || j < nlocal) { + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { a_duCond[j] -= uTmp; } @@ -573,7 +605,7 @@ void PairDPDfdtEnergyKokkos::operator()(TagPairDPDfdtEnergyComputeNo evdwl = 0.5*a0_ij*cut_ij * wd; evdwl *= factor_dpd; if (EVFLAG) - ev.evdwl += ((NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index e22a4bff22..abc158d72c 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -221,6 +221,14 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } k_error_flag.template modify(); @@ -509,7 +517,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxCompute::operator()(TagPairExp6rxComputetemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 4379cc4001..ef30fdc6f6 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -216,6 +216,14 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } } if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK); @@ -378,7 +386,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute::operator()(TagPairMultiLucyRXCompute @@ -491,6 +499,17 @@ void PairMultiLucyRXKokkos::computeLocalDensity() Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else if (neighflag == FULL) { + if (newton_pair) + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + if (one_type) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); } atomKK->modified(execution_space,DPDRHO_MASK); @@ -548,7 +567,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4; rho_i_contrib += factor; - if (NEWTON_PAIR || j < nlocal) + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) a_rho[j] += factor; } } else if (rsq < d_cutsq(itype,jtype)) { @@ -557,7 +576,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeLoca const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor; const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4; rho_i_contrib += factor; - if (NEWTON_PAIR || j < nlocal) + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) a_rho[j] += factor; } } diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index cbb1096712..e3d416f293 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -693,6 +693,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else if (neighflag == FULL) { + compute_all_items( + newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } } else { if (neighflag == HALFTHREAD) { @@ -711,6 +719,14 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else if (neighflag == FULL) { + compute_all_items( + newton_pair, ev, nlocal, + l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); } } From 4b4bc7dc3bd11d52e6ad49e99749f80405d7dbbb Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 13 Mar 2017 03:03:27 -0400 Subject: [PATCH 201/267] USER-DPD: specialize PairTableRXKokkos's compute_all_items() on NEWTON_PAIR No noticable performance change, but it does eliminate a deep conditional. --- src/KOKKOS/pair_table_rx_kokkos.cpp | 110 +++++++++++++++++----------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index e3d416f293..e93ea53fa4 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -514,9 +514,8 @@ compute_item( return ev; } -template +template static void compute_all_items( - int newton_pair, EV_FLOAT& ev, int nlocal, int inum, @@ -560,42 +559,23 @@ static void compute_all_items( if (eflag || vflag) { Kokkos::parallel_reduce(inum, LAMMPS_LAMBDA(int i, EV_FLOAT& energy_virial) { - if (newton_pair) { energy_virial += - compute_item( + compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, v_vatom, v_eatom); - } else { - energy_virial += - compute_item( - i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, - mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, eflag, eflag_atom, - vflag, vflag_global, vflag_atom, v_vatom, v_eatom); - } }, ev); } else { Kokkos::parallel_for(inum, LAMMPS_LAMBDA(int i) { - if (newton_pair) { - compute_item( + compute_item( i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, v_vatom, v_eatom); - } else { - compute_item( - i, nlocal, d_ilist, d_neighbors, d_numneigh, x, type, - mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, - special_lj, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, - d_table_const, eflag, eflag_atom, - vflag, vflag_global, vflag_atom, v_vatom, v_eatom); - } }); } } @@ -678,55 +658,103 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) EV_FLOAT ev; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == HALFTHREAD) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } else if (neighflag == HALF) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } else if (neighflag == FULL) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } } else { if (neighflag == HALFTHREAD) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } else if (neighflag == HALF) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } else if (neighflag == FULL) { - compute_all_items( - newton_pair, ev, nlocal, - l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + if (newton_pair) { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, d_table_const, eflag, eflag_atom, vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } else { + compute_all_items( + ev, nlocal, l->inum, l->d_ilist, l->d_neighbors, l->d_numneigh, + x, type, mixWtSite1old, mixWtSite2old, mixWtSite1, mixWtSite2, + special_lj_local, m_cutsq, d_cutsq, f, uCG, uCGnew, isite1, isite2, + d_table_const, eflag, eflag_atom, + vflag, vflag_global, vflag_atom, d_vatom, d_eatom); + } } } From d2cbfef13bac634b99459f3f4f78465ed03e712d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 13 Mar 2017 09:01:35 -0600 Subject: [PATCH 202/267] Add CUDA support to atom_vec_hybrid_kokkos --- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 37 +++++++++++++++++++++++---- src/KOKKOS/atom_vec_hybrid_kokkos.h | 2 +- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 0c9d261be5..e5e361e70a 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -13,7 +13,6 @@ #include #include -#include "atom_vec_kokkos.h" #include "atom_vec_hybrid_kokkos.h" #include "atom_kokkos.h" #include "domain.h" @@ -21,6 +20,7 @@ #include "fix.h" #include "memory.h" #include "error.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -132,10 +132,6 @@ void AtomVecHybridKokkos::init() { AtomVec::init(); for (int k = 0; k < nstyles; k++) styles[k]->init(); - -#ifdef KOKKOS_HAVE_CUDA - error->all(FLERR,"AtomVecHybridKokkos doesn't yet support CUDA"); -#endif } /* ---------------------------------------------------------------------- @@ -303,6 +299,8 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { + sync(Host,X_MASK); + int i,j,k,m; double dx,dy,dz; @@ -345,6 +343,8 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { + sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; int omega_flag = atom->omega_flag; @@ -455,6 +455,8 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf) h_x(i,2) = buf[m++]; } + modified(Host,X_MASK); + // unpack sub-style contributions as contiguous chunks for (k = 0; k < nstyles; k++) @@ -490,6 +492,8 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) } } + modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + // unpack sub-style contributions as contiguous chunks for (k = 0; k < nstyles; k++) @@ -500,6 +504,8 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf) { + sync(Host,F_MASK); + int i,k,m,last; m = 0; @@ -532,6 +538,8 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) h_f(j,2) += buf[m++]; } + modified(Host,F_MASK); + // unpack sub-style contributions as contiguous chunks for (k = 0; k < nstyles; k++) @@ -543,6 +551,8 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int pbc_flag, int *pbc) { + sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + int i,j,k,m; double dx,dy,dz; @@ -595,6 +605,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { + sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; int omega_flag = atom->omega_flag; @@ -722,6 +733,8 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf) h_mask[i] = (int) ubuf(buf[m++]).i; } + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + // unpack sub-style contributions as contiguous chunks for (k = 0; k < nstyles; k++) @@ -766,6 +779,8 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf) } } + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + // unpack sub-style contributions as contiguous chunks for (k = 0; k < nstyles; k++) @@ -946,6 +961,8 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord) void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values) { + sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); @@ -975,6 +992,8 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val h_angmom(nlocal,2) = 0.0; } + modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + // each sub-style parses sub-style specific values int m = 5; @@ -990,10 +1009,14 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val void AtomVecHybridKokkos::data_vel(int m, char **values) { + sync(Host,V_MASK); + h_v(m,0) = atof(values[0]); h_v(m,1) = atof(values[1]); h_v(m,2) = atof(values[2]); + modified(Host,V_MASK); + // each sub-style parses sub-style specific values int n = 3; @@ -1007,6 +1030,8 @@ void AtomVecHybridKokkos::data_vel(int m, char **values) void AtomVecHybridKokkos::pack_data(double **buf) { + sync(Host,TAG_MASK|TYPE_MASK|X_MASK); + int k,m; int nlocal = atom->nlocal; @@ -1056,6 +1081,8 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf) void AtomVecHybridKokkos::pack_vel(double **buf) { + sync(Host,V_MASK); + int k,m; int nlocal = atom->nlocal; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 802314bfa6..fcf48f6c74 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -21,7 +21,7 @@ AtomStyle(hybrid/kk,AtomVecHybridKokkos) #define LMP_ATOM_VEC_HYBRID_KOKKOS_H #include -#include "atom_vec.h" +#include "atom_vec_kokkos.h" #include "kokkos_type.h" namespace LAMMPS_NS { From 5925460a275fe4cf588e86eab45242351f5e86cf Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 14 Mar 2017 14:27:23 -0500 Subject: [PATCH 203/267] Improve the performance of read_data of gzip'ed files using taskset. Normally, the gzip process would be pinned to the same core as the MPI rank 0 process, which makes the pipe stay in one core's cache, but forces the two process to fight for that core, slowing things down. --- src/read_data.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index d6a33d6e9d..3e180b7aeb 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -50,7 +50,7 @@ using namespace LAMMPS_NS; #define MAXLINE 256 #define LB_FACTOR 1.1 -#define CHUNK 1024 +#define CHUNK 4096 #define DELTA 4 // must be 2 or larger #define MAXBODY 32 // max # of lines in one body @@ -1856,8 +1856,12 @@ void ReadData::open(char *file) if (!compressed) fp = fopen(file,"r"); else { #ifdef LAMMPS_GZIP - char gunzip[128]; - sprintf(gunzip,"gzip -c -d %s",file); + char gunzip[2048]; + // Use taskset to force the gzip process to NOT run on the 0th "CPU", which should + // keep it from thrashing with the MPI rank zero process (the one reading the pipe). + // This is Linux specific, and the 1023 upper range might also be system specific. + // Use of something like hwloc would be more portable... but more complicated. + sprintf(gunzip,"taskset -c 1-1023 gzip -c -d %s",file); #ifdef _WIN32 fp = _popen(gunzip,"rb"); From f4a08ba4fcaed73f5bc9660266a75ddc868c9c3d Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Wed, 15 Mar 2017 09:25:16 -0600 Subject: [PATCH 204/267] pass Views by reference for pair_table_rx_kokkos this greatly speeds up pair_table_rx_kokkos, and should put it on par with pair_table_rx in the Serial case --- src/KOKKOS/neigh_list_kokkos.h | 4 +-- src/KOKKOS/pair_table_rx_kokkos.cpp | 44 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index b43e1106f2..cece97197d 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -89,8 +89,8 @@ public: KOKKOS_INLINE_FUNCTION static AtomNeighborsConst static_neighbors_const(int i, - typename ArrayTypes::t_neighbors_2d_const d_neighbors, - typename ArrayTypes::t_int_1d_const d_numneigh) { + typename ArrayTypes::t_neighbors_2d_const const& d_neighbors, + typename ArrayTypes::t_int_1d_const const& d_numneigh) { return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i), &d_neighbors(i,1)-&d_neighbors(i,0)); } diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index e93ea53fa4..044f303bf5 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -193,7 +193,7 @@ KOKKOS_INLINE_FUNCTION static F_FLOAT compute_fpair(F_FLOAT rsq, int itype, int jtype, - typename PairTableRXKokkos::TableDeviceConst d_table_const + typename PairTableRXKokkos::TableDeviceConst const& d_table_const ) { Pair::union_int_float_t rsq_lookup; double fpair; @@ -228,7 +228,7 @@ static F_FLOAT compute_evdwl( F_FLOAT rsq, int itype, int jtype, - typename PairTableRXKokkos::TableDeviceConst d_table_const + typename PairTableRXKokkos::TableDeviceConst const& d_table_const ) { double evdwl; Pair::union_int_float_t rsq_lookup; @@ -274,11 +274,11 @@ ev_tally( Kokkos::View::t_virial_array::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > v_vatom, + Kokkos::MemoryTraits::value> > const& v_vatom, Kokkos::View::t_efloat_1d::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > v_eatom) + Kokkos::MemoryTraits::value> > const& v_eatom) { if (eflag) { if (eflag_atom) { @@ -374,32 +374,32 @@ static EV_FLOAT compute_item( int ii, int nlocal, - typename ArrayTypes::t_int_1d_const d_ilist, - typename ArrayTypes::t_neighbors_2d_const d_neighbors, - typename ArrayTypes::t_int_1d_const d_numneigh, - typename ArrayTypes::t_x_array_randomread x, - typename ArrayTypes::t_int_1d_randomread type, - Kokkos::View mixWtSite1old, - Kokkos::View mixWtSite2old, - Kokkos::View mixWtSite1, - Kokkos::View mixWtSite2, - Few special_lj, - Few, MAX_TYPES_STACKPARAMS+1> m_cutsq, - typename ArrayTypes::t_ffloat_2d d_cutsq, + typename ArrayTypes::t_int_1d_const const& d_ilist, + typename ArrayTypes::t_neighbors_2d_const const& d_neighbors, + typename ArrayTypes::t_int_1d_const const& d_numneigh, + typename ArrayTypes::t_x_array_randomread const& x, + typename ArrayTypes::t_int_1d_randomread const& type, + Kokkos::View const& mixWtSite1old, + Kokkos::View const& mixWtSite2old, + Kokkos::View const& mixWtSite1, + Kokkos::View const& mixWtSite2, + Few const& special_lj, + Few, MAX_TYPES_STACKPARAMS+1> const& m_cutsq, + typename ArrayTypes::t_ffloat_2d const& d_cutsq, Kokkos::View::t_f_array::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > f, + Kokkos::MemoryTraits::value> > const& f, Kokkos::View::t_efloat_1d::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > uCG, + Kokkos::MemoryTraits::value> > const& uCG, Kokkos::View::t_efloat_1d::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > uCGnew, + Kokkos::MemoryTraits::value> > const& uCGnew, int isite1, int isite2, - typename PairTableRXKokkos::TableDeviceConst d_table_const, + typename PairTableRXKokkos::TableDeviceConst const& d_table_const, int eflag, int eflag_atom, int vflag, @@ -408,11 +408,11 @@ compute_item( Kokkos::View::t_virial_array::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > v_vatom, + Kokkos::MemoryTraits::value> > const& v_vatom, Kokkos::View::t_efloat_1d::array_layout, DeviceType, - Kokkos::MemoryTraits::value> > v_eatom) { + Kokkos::MemoryTraits::value> > const& v_eatom) { EV_FLOAT ev; auto i = d_ilist(ii); auto xtmp = x(i,0); From 7ebed717de983c351208b8e2080b37fbf761d522 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 15 Mar 2017 16:05:51 -0600 Subject: [PATCH 205/267] Adding gb_test --- src/KOKKOS/kokkos.cpp | 10 +- src/KOKKOS/kokkos.h | 1 + src/KOKKOS/pair_exp6_rx_kokkos.cpp | 425 +++++++++++++++++++++++++++++ src/KOKKOS/pair_exp6_rx_kokkos.h | 27 ++ 4 files changed, 462 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index b8be74ac1e..a000ad5550 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -34,6 +34,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) lmp->kokkos = this; auto_sync = 1; + gb_test = 0; int me = 0; MPI_Comm_rank(world,&me); @@ -156,6 +157,7 @@ void KokkosLMP::accelerator(int narg, char **arg) neighflag = FULL; neighflag_qeq = FULL; neighflag_qeq_set = 0; + gb_test = 0; int newtonflag = 0; double binsize = 0.0; exchange_comm_classic = forward_comm_classic = 0; @@ -197,6 +199,12 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) newtonflag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"gb/test") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"off") == 0) gb_test = 0; + else if (strcmp(arg[iarg+1],"on") == 0) gb_test = 1; + else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else if (strcmp(arg[iarg],"comm") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) { @@ -293,4 +301,4 @@ void KokkosLMP::my_signal_handler(int sig) if (sig == SIGSEGV) { kill(getpid(),SIGABRT); } -} \ No newline at end of file +} diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 8e28b38cbf..3784d806bf 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -32,6 +32,7 @@ class KokkosLMP : protected Pointers { int num_threads,ngpu; int numa; int auto_sync; + int gb_test; KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP(); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index abc158d72c..8cf235964c 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -205,6 +205,8 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev; + if (!lmp->kokkos->gb_test) { + if (neighflag == HALF) { if (newton_pair) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); @@ -231,6 +233,48 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) } } + } else { // No atomics + + num_threads = lmp->kokkos->num_threads; + int nmax = f.dimension_1(); + if (nmax > t_f.dimension_1()) { + t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax); + t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax); + t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",num_threads,nmax); + } + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); + + if (neighflag == HALF) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == FULL) { + if (newton_pair) { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); + + } + k_error_flag.template modify(); k_error_flag.template sync(); if (k_error_flag.h_view()) @@ -636,6 +680,387 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputetemplate operator()(TagPairExp6rxCompute(), ii, ev); } +// Experimental thread-safety using duplicated data instead of atomics + +template +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics, const int &ii, EV_FLOAT& ev) const { + + int tid = 0; +#ifndef KOKKOS_HAVE_CUDA + tid = DeviceType::hardware_thread_id(); +#endif + + int i,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair; + double rsq,r2inv,r6inv,forceExp6,factor_lj; + double rCut,rCutInv,rCut2inv,rCut6inv,rCutExp,urc,durc; + double rm2ij,rm6ij; + double r,rexp; + + double alphaOld12_ij, rmOld12_ij, epsilonOld12_ij; + double alphaOld21_ij, rmOld21_ij, epsilonOld21_ij; + double alpha12_ij, rm12_ij, epsilon12_ij; + double alpha21_ij, rm21_ij, epsilon21_ij; + double rminv, buck1, buck2; + double epsilonOld1_i,alphaOld1_i,rmOld1_i; + double epsilonOld1_j,alphaOld1_j,rmOld1_j; + double epsilonOld2_i,alphaOld2_i,rmOld2_i; + double epsilonOld2_j,alphaOld2_j,rmOld2_j; + double epsilon1_i,alpha1_i,rm1_i; + double epsilon1_j,alpha1_j,rm1_j; + double epsilon2_i,alpha2_i,rm2_i; + double epsilon2_j,alpha2_j,rm2_j; + double evdwlOldEXP6_12, evdwlOldEXP6_21, fpairOldEXP6_12, fpairOldEXP6_21; + double evdwlEXP6_12, evdwlEXP6_21; + double mixWtSite1old_i, mixWtSite1old_j; + double mixWtSite2old_i, mixWtSite2old_j; + double mixWtSite1_i, mixWtSite1_j; + double mixWtSite2_i, mixWtSite2_j; + + const int nRep = 12; + const double shift = 1.05; + double rin1, aRep, uin1, win1, uin1rep, rin1exp, rin6, rin6inv; + + evdwlOld = 0.0; + evdwl = 0.0; + + i = d_ilist[ii]; + xtmp = x(i,0); + ytmp = x(i,1); + ztmp = x(i,2); + itype = type[i]; + jnum = d_numneigh[i]; + + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + double uCG_i = 0.0; + double uCGnew_i = 0.0; + + { + epsilon1_i = PairExp6ParamData.epsilon1[i]; + alpha1_i = PairExp6ParamData.alpha1[i]; + rm1_i = PairExp6ParamData.rm1[i]; + mixWtSite1_i = PairExp6ParamData.mixWtSite1[i]; + epsilon2_i = PairExp6ParamData.epsilon2[i]; + alpha2_i = PairExp6ParamData.alpha2[i]; + rm2_i = PairExp6ParamData.rm2[i]; + mixWtSite2_i = PairExp6ParamData.mixWtSite2[i]; + epsilonOld1_i = PairExp6ParamData.epsilonOld1[i]; + alphaOld1_i = PairExp6ParamData.alphaOld1[i]; + rmOld1_i = PairExp6ParamData.rmOld1[i]; + mixWtSite1old_i = PairExp6ParamData.mixWtSite1old[i]; + epsilonOld2_i = PairExp6ParamData.epsilonOld2[i]; + alphaOld2_i = PairExp6ParamData.alphaOld2[i]; + rmOld2_i = PairExp6ParamData.rmOld2[i]; + mixWtSite2old_i = PairExp6ParamData.mixWtSite2old[i]; + } + + for (jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x(j,0); + dely = ytmp - x(j,1); + delz = ztmp - x(j,2); + + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < d_cutsq(itype,jtype)) { // optimize + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + + r = sqrt(rsq); + rCut2inv = 1.0/d_cutsq(itype,jtype); + rCut6inv = rCut2inv*rCut2inv*rCut2inv; + rCut = sqrt(d_cutsq(itype,jtype)); + rCutInv = 1.0/rCut; + + // + // A. Compute the exp-6 potential + // + + // A1. Get alpha, epsilon and rm for particle j + + { + epsilon1_j = PairExp6ParamData.epsilon1[j]; + alpha1_j = PairExp6ParamData.alpha1[j]; + rm1_j = PairExp6ParamData.rm1[j]; + mixWtSite1_j = PairExp6ParamData.mixWtSite1[j]; + epsilon2_j = PairExp6ParamData.epsilon2[j]; + alpha2_j = PairExp6ParamData.alpha2[j]; + rm2_j = PairExp6ParamData.rm2[j]; + mixWtSite2_j = PairExp6ParamData.mixWtSite2[j]; + epsilonOld1_j = PairExp6ParamData.epsilonOld1[j]; + alphaOld1_j = PairExp6ParamData.alphaOld1[j]; + rmOld1_j = PairExp6ParamData.rmOld1[j]; + mixWtSite1old_j = PairExp6ParamData.mixWtSite1old[j]; + epsilonOld2_j = PairExp6ParamData.epsilonOld2[j]; + alphaOld2_j = PairExp6ParamData.alphaOld2[j]; + rmOld2_j = PairExp6ParamData.rmOld2[j]; + mixWtSite2old_j = PairExp6ParamData.mixWtSite2old[j]; + } + + // A2. Apply Lorentz-Berthelot mixing rules for the i-j pair + alphaOld12_ij = sqrt(alphaOld1_i*alphaOld2_j); + rmOld12_ij = 0.5*(rmOld1_i + rmOld2_j); + epsilonOld12_ij = sqrt(epsilonOld1_i*epsilonOld2_j); + alphaOld21_ij = sqrt(alphaOld2_i*alphaOld1_j); + rmOld21_ij = 0.5*(rmOld2_i + rmOld1_j); + epsilonOld21_ij = sqrt(epsilonOld2_i*epsilonOld1_j); + + alpha12_ij = sqrt(alpha1_i*alpha2_j); + rm12_ij = 0.5*(rm1_i + rm2_j); + epsilon12_ij = sqrt(epsilon1_i*epsilon2_j); + alpha21_ij = sqrt(alpha2_i*alpha1_j); + rm21_ij = 0.5*(rm2_i + rm1_j); + epsilon21_ij = sqrt(epsilon2_i*epsilon1_j); + + evdwlOldEXP6_12 = 0.0; + evdwlOldEXP6_21 = 0.0; + evdwlEXP6_12 = 0.0; + evdwlEXP6_21 = 0.0; + fpairOldEXP6_12 = 0.0; + fpairOldEXP6_21 = 0.0; + + if(rmOld12_ij!=0.0 && rmOld21_ij!=0.0){ + if(alphaOld21_ij == 6.0 || alphaOld12_ij == 6.0) + k_error_flag.d_view() = 1; + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rmOld12_ij; + buck1 = epsilonOld12_ij / (alphaOld12_ij - 6.0); + rexp = expValue(alphaOld12_ij*(1.0-r*rminv)); + rm2ij = rmOld12_ij*rmOld12_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alphaOld12_ij*(1.0-rCut*rminv)); + buck2 = 6.0*alphaOld12_ij; + urc = buck1*(6.0*rCutExp - alphaOld12_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rmOld12_ij*func_rin(alphaOld12_ij); + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alphaOld12_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alphaOld12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + aRep = win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + forceExp6 = double(nRep)*aRep/powint(r,nRep); + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = buck1*(6.0*rexp - alphaOld12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rmOld21_ij; + buck1 = epsilonOld21_ij / (alphaOld21_ij - 6.0); + buck2 = 6.0*alphaOld21_ij; + rexp = expValue(alphaOld21_ij*(1.0-r*rminv)); + rm2ij = rmOld21_ij*rmOld21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alphaOld21_ij*(1.0-rCut*rminv)); + buck2 = 6.0*alphaOld21_ij; + urc = buck1*(6.0*rCutExp - alphaOld21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rmOld21_ij*func_rin(alphaOld21_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alphaOld21_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alphaOld21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + aRep = win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + forceExp6 = double(nRep)*aRep/powint(r,nRep); + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = buck1*(6.0*rexp - alphaOld21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + if (isite1 == isite2) + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12; + else + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*evdwlOldEXP6_21; + + evdwlOld *= factor_lj; + + uCG_i += 0.5*evdwlOld; + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) + t_uCG(tid,j) += 0.5*evdwlOld; + } + + if(rm12_ij!=0.0 && rm21_ij!=0.0){ + if(alpha21_ij == 6.0 || alpha12_ij == 6.0) + k_error_flag.d_view() = 1; + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rm12_ij; + buck1 = epsilon12_ij / (alpha12_ij - 6.0); + buck2 = 6.0*alpha12_ij; + rexp = expValue(alpha12_ij*(1.0-r*rminv)); + rm2ij = rm12_ij*rm12_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alpha12_ij*(1.0-rCut*rminv)); + urc = buck1*(6.0*rCutExp - alpha12_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rm12_ij*func_rin(alpha12_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alpha12_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alpha12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + aRep = win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + evdwlEXP6_12 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + evdwlEXP6_12 = buck1*(6.0*rexp - alpha12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + rminv = 1.0/rm21_ij; + buck1 = epsilon21_ij / (alpha21_ij - 6.0); + buck2 = 6.0*alpha21_ij; + rexp = expValue(alpha21_ij*(1.0-r*rminv)); + rm2ij = rm21_ij*rm21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alpha21_ij*(1.0-rCut*rminv)); + urc = buck1*(6.0*rCutExp - alpha21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rm21_ij*func_rin(alpha21_ij); + + if(r < rin1){ + rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + rin6inv = 1.0/rin6; + + rin1exp = expValue(alpha21_ij*(1.0-rin1*rminv)); + + uin1 = buck1*(6.0*rin1exp - alpha21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + aRep = win1*powint(rin1,nRep)/nRep; + + uin1rep = aRep/powint(rin1,nRep); + + evdwlEXP6_21 = uin1 - uin1rep + aRep/powint(r,nRep); + } else { + evdwlEXP6_21 = buck1*(6.0*rexp - alpha21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + } + + // + // Apply Mixing Rule to get the overall force for the CG pair + // + if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12; + else fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*fpairOldEXP6_21; + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + t_f(tid,j,0) -= delx*fpair; + t_f(tid,j,1) -= dely*fpair; + t_f(tid,j,2) -= delz*fpair; + } + + if (isite1 == isite2) evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12; + else evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12 + sqrt(mixWtSite2_i*mixWtSite1_j)*evdwlEXP6_21; + evdwl *= factor_lj; + + uCGnew_i += 0.5*evdwl; + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) + t_uCGnew(tid,j) += 0.5*evdwl; + evdwl = evdwlOld; + if (EVFLAG) + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + + t_f(tid,i,0) += fx_i; + t_f(tid,i,1) += fy_i; + t_f(tid,i,2) += fz_i; + t_uCG(tid,i) += uCG_i; + t_uCGnew(tid,i) += uCGnew_i; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairExp6rxComputeNoAtomics(), ii, ev); +} + +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxCollapseDupViews, const int &i) const { + for (int n = 0; n < num_threads; n++) { + f(i,0) += t_f(n,i,0); + f(i,1) += t_f(n,i,1); + f(i,2) += t_f(n,i,2); + uCG(i) += t_uCG(n,i); + uCGnew(i) += t_uCGnew(n,i); + } +} + +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxZeroDupViews, const int &i) const { + for (int n = 0; n < num_threads; n++) { + t_f(n,i,0) = 0.0; + t_f(n,i,1) = 0.0; + t_f(n,i,2) = 0.0; + t_uCG(n,i) = 0.0; + t_uCGnew(n,i) = 0.0; + } +} + + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 488c9d0039..8754a73c96 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -57,6 +57,12 @@ struct TagPairExp6rxgetMixingWeights{}; template struct TagPairExp6rxCompute{}; +template +struct TagPairExp6rxComputeNoAtomics{}; + +struct TagPairExp6rxCollapseDupViews{}; +struct TagPairExp6rxZeroDupViews{}; + template class PairExp6rxKokkos : public PairExp6rx { public: @@ -81,6 +87,20 @@ class PairExp6rxKokkos : public PairExp6rx { KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxCompute, const int&) const; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxComputeNoAtomics, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxComputeNoAtomics, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxCollapseDupViews, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxZeroDupViews, const int&) const; + template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -94,6 +114,7 @@ class PairExp6rxKokkos : public PairExp6rx { int eflag,vflag; int nlocal,newton_pair,neighflag; double special_lj[4]; + int num_threads; typename AT::t_x_array_randomread x; typename AT::t_f_array f; @@ -101,6 +122,12 @@ class PairExp6rxKokkos : public PairExp6rx { typename AT::t_efloat_1d uCG, uCGnew; typename AT::t_float_2d dvector; + typedef Kokkos::View t_f_array_thread; + typedef Kokkos::View t_efloat_1d_thread; + + t_f_array_thread t_f; + t_efloat_1d_thread t_uCG, t_uCGnew; + DAT::tdual_efloat_1d k_eatom; DAT::tdual_virial_array k_vatom; DAT::t_efloat_1d d_eatom; From acdb932c4ec56b2ce56a71280dab5c17b39f2c03 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 16 Mar 2017 09:28:27 -0600 Subject: [PATCH 206/267] Fixing index issue in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 8cf235964c..577d5261a3 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -236,7 +236,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) } else { // No atomics num_threads = lmp->kokkos->num_threads; - int nmax = f.dimension_1(); + int nmax = f.dimension_0(); if (nmax > t_f.dimension_1()) { t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax); t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax); From f5b7361ef6b6dbeedb2ad2181a44db64943001bb Mon Sep 17 00:00:00 2001 From: "Christopher P. Stone" Date: Thu, 16 Mar 2017 21:31:30 -0400 Subject: [PATCH 207/267] Non-kokkos candidate of PairExp6rxKokkos::getMixingWeights to improve vectorization on the KNL. - Moved the particle loop inside a replica of getMixingWeights, getMixingWeightsVect, and refactored to improve vectorization. - Added OMP SIMD and OMP threading directly inside that function but will replace with kokkos parallel_for and parallel_reduce methods later. --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 473 ++++++++++++++++++++++++++++- src/KOKKOS/pair_exp6_rx_kokkos.h | 3 + 2 files changed, 475 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index abc158d72c..df663c9df9 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -53,6 +53,22 @@ using namespace MathSpecialKokkos; #define exp6PotentialType (1) #define isExp6PotentialType(_type) ( (_type) == exp6PotentialType ) +namespace /* anonymous */ +{ + +//typedef double TimerType; +//TimerType getTimeStamp(void) { return MPI_Wtime(); } +//double getElapsedTime( const TimerType &t0, const TimerType &t1) { return t1-t0; } + +typedef struct timespec TimerType; +TimerType getTimeStamp(void) { TimerType tick; clock_gettime( CLOCK_MONOTONIC, &tick); return tick; } +double getElapsedTime( const TimerType &t0, const TimerType &t1) +{ + return (t1.tv_sec - t0.tv_sec) + 1e-9*(t1.tv_nsec - t0.tv_nsec); +} + +} // end namespace + /* ---------------------------------------------------------------------- */ template @@ -121,6 +137,8 @@ void PairExp6rxKokkos::init_style() template void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) { + TimerType t_start = getTimeStamp(); + copymode = 1; eflag = eflag_in; @@ -165,6 +183,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) // and ghost atoms. Make the parameter data persistent // and exchange like any other atom property later. + TimerType t_mix_start = getTimeStamp(); { const int np_total = nlocal + atom->nghost; @@ -185,8 +204,77 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); - Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); + //Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); + + //typename AT::t_float_1d epsilon1 ("epsilon1" ,np_total); + //typename AT::t_float_1d alpha1 ("alpha1" ,np_total); + //typename AT::t_float_1d rm1 ("rm1" ,np_total); + //typename AT::t_float_1d mixWtSite1 ("mixWtSite1" ,np_total); + //typename AT::t_float_1d epsilon2 ("epsilon2" ,np_total); + //typename AT::t_float_1d alpha2 ("alpha2" ,np_total); + //typename AT::t_float_1d rm2 ("rm2" ,np_total); + //typename AT::t_float_1d mixWtSite2 ("mixWtSite2" ,np_total); + //typename AT::t_float_1d epsilonOld1 ("epsilonOld1" ,np_total); + //typename AT::t_float_1d alphaOld1 ("alphaOld1" ,np_total); + //typename AT::t_float_1d rmOld1 ("rmOld1" ,np_total); + //typename AT::t_float_1d mixWtSite1old("mixWtSite1old",np_total); + //typename AT::t_float_1d epsilonOld2 ("epsilonOld2" ,np_total); + //typename AT::t_float_1d alphaOld2 ("alphaOld2" ,np_total); + //typename AT::t_float_1d rmOld2 ("rmOld2" ,np_total); + //typename AT::t_float_1d mixWtSite2old("mixWtSite2old",np_total); + + int errorFlag = 0; + getMixingWeightsVect (np_total, errorFlag, PairExp6ParamData.epsilon1, + PairExp6ParamData.alpha1, + PairExp6ParamData.rm1, + PairExp6ParamData.mixWtSite1, + PairExp6ParamData.epsilon2, + PairExp6ParamData.alpha2, + PairExp6ParamData.rm2, + PairExp6ParamData.mixWtSite2, + PairExp6ParamData.epsilonOld1, + PairExp6ParamData.alphaOld1, + PairExp6ParamData.rmOld1, + PairExp6ParamData.mixWtSite1old, + PairExp6ParamData.epsilonOld2, + PairExp6ParamData.alphaOld2, + PairExp6ParamData.rmOld2, + PairExp6ParamData.mixWtSite2old); + if (errorFlag == 1) + error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); + else if (errorFlag == 2) + error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); + + //#define _test_var(var) { \ + // double ref2 = 0, err2 = 0; \ + // for (int id = 0; id < np_total; ++id) \ + // { \ + // double ref = PairExp6ParamData. var [id]; \ + // double diff = ref - var[id]; \ + // ref2 += ref*ref; \ + // err2 += diff*diff; \ + // } \ + // if (ref2 < 1e-20) ref2 = 1.0; \ + // if (sqrt(err2)/sqrt(ref2) > 1e-12) \ + // printf("%s: %e %e %e\n", # var, sqrt(ref2), sqrt(err2), sqrt(err2)/sqrt(ref2)); \ + //} + //_test_var( epsilon1); + //_test_var( alpha1); + //_test_var( rm1); + //_test_var( epsilon2); + //_test_var( alpha2); + //_test_var( rm2); + //_test_var( mixWtSite2); + //_test_var( epsilonOld1); + //_test_var( alphaOld1); + //_test_var( rmOld1); + //_test_var( mixWtSite1old); + //_test_var( epsilonOld2); + //_test_var( alphaOld2); + //_test_var( rmOld2); + //_test_var( mixWtSite2old); } + TimerType t_mix_stop = getTimeStamp(); k_error_flag.template modify(); k_error_flag.template sync(); @@ -259,6 +347,9 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) } copymode = 0; + + TimerType t_stop = getTimeStamp(); + printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); } template @@ -917,6 +1008,7 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub nMoleculesOld2 = dvector(ispecies+nspecies,id); nMolecules2 = dvector(ispecies,id); fractionOld2 = dvector(ispecies+nspecies,id)/nTotalold; + fraction2 = nMolecules2/nTotal; } // If Site1 or Site2 matches is a fluid, then compute the paramters @@ -1072,6 +1164,385 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub } } +#ifdef _OPENMP +void partition_range( const int begin, const int end, int &thread_begin, int &thread_end, const int chunkSize = 1) +{ + int threadId = omp_get_thread_num(); + int nThreads = omp_get_num_threads(); + + const int len = end - begin; + const int nBlocks = (len + (chunkSize - 1)) / chunkSize; + const int nBlocksPerThread = nBlocks / nThreads; + const int nRemaining = nBlocks - nBlocksPerThread * nThreads; + int block_lo, block_hi; + if (threadId < nRemaining) + { + block_lo = threadId * nBlocksPerThread + threadId; + block_hi = block_lo + nBlocksPerThread + 1; + } + else + { + block_lo = threadId * nBlocksPerThread + nRemaining; + block_hi = block_lo + nBlocksPerThread; + } + + thread_begin = std::min(begin + block_lo * chunkSize, end); + thread_end = std::min(begin + block_hi * chunkSize, end); + //printf("tid: %d %d %d %d %d\n", threadId, block_lo, block_hi, thread_begin, thread_end); +} +#endif + +/* ---------------------------------------------------------------------- */ + +template + template +void PairExp6rxKokkos::getMixingWeightsVect(const int np_total, int errorFlag, + ArrayT &epsilon1, ArrayT &alpha1, ArrayT &rm1, ArrayT &mixWtSite1, ArrayT &epsilon2, ArrayT &alpha2, ArrayT &rm2, ArrayT &mixWtSite2, ArrayT &epsilon1_old, ArrayT &alpha1_old, ArrayT &rm1_old, ArrayT &mixWtSite1old, ArrayT &epsilon2_old, ArrayT &alpha2_old, ArrayT &rm2_old, ArrayT &mixWtSite2old) const +{ + ArrayT epsilon("PairExp6ParamData.epsilon", np_total); + ArrayT rm3("PairExp6ParamData.rm3", np_total); + ArrayT alpha("PairExp6ParamData.alpha", np_total); + ArrayT xMolei("PairExp6ParamData.xMolei", np_total); + + ArrayT epsilon_old("PairExp6ParamData.epsilon_old", np_total); + ArrayT rm3_old("PairExp6ParamData.rm3_old", np_total); + ArrayT alpha_old("PairExp6ParamData.alpha_old", np_total); + ArrayT xMolei_old("PairExp6ParamData.xMolei_old", np_total); + + ArrayT fractionOFA("PairExp6ParamData.fractionOFA", np_total); + ArrayT fraction1("PairExp6ParamData.fraction1", np_total); + ArrayT fraction2("PairExp6ParamData.fraction2", np_total); + ArrayT nMoleculesOFA("PairExp6ParamData.nMoleculesOFA", np_total); + ArrayT nMolecules1("PairExp6ParamData.nMolecules1", np_total); + ArrayT nMolecules2("PairExp6ParamData.nMolecules2", np_total); + ArrayT nTotal("PairExp6ParamData.nTotal", np_total); + + ArrayT fractionOFAold("PairExp6ParamData.fractionOFAold", np_total); + ArrayT fractionOld1("PairExp6ParamData.fractionOld1", np_total); + ArrayT fractionOld2("PairExp6ParamData.fractionOld2", np_total); + ArrayT nMoleculesOFAold("PairExp6ParamData.nMoleculesOFAold", np_total); + ArrayT nMoleculesOld1("PairExp6ParamData.nMoleculesOld1", np_total); + ArrayT nMoleculesOld2("PairExp6ParamData.nMoleculesOld2", np_total); + ArrayT nTotalold("PairExp6ParamData.nTotalold", np_total); + + int errorFlag1 = 0, errorFlag2 = 0; + +#ifdef _OPENMP + #pragma omp parallel reduction(+: errorFlag1, errorFlag2) +#endif + { + int idx_begin = 0, idx_end = np_total; +#ifdef _OPENMP + partition_range( 0, np_total, idx_begin, idx_end, 16 ); +#endif + + // Zero out all of the terms first. + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + rm3[id] = 0.0; + epsilon[id] = 0.0; + alpha[id] = 0.0; + epsilon_old[id] = 0.0; + rm3_old[id] = 0.0; + alpha_old[id] = 0.0; + fractionOFA[id] = 0.0; + fractionOFAold[id] = 0.0; + nMoleculesOFA[id] = 0.0; + nMoleculesOFAold[id] = 0.0; + nTotal[id] = 0.0; + nTotalold[id] = 0.0; + } + + // Compute the total number of molecules in the old and new CG particle as well as the total number of molecules in the fluid portion of the old and new CG particle + for (int ispecies = 0; ispecies < nspecies; ispecies++) + { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + nTotal[id] += dvector(ispecies,id); + nTotalold[id] += dvector(ispecies+nspecies,id); + } + + const int iparam = d_mol2param[ispecies]; + + if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; + if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { + if (isite1 == d_params[iparam].ispecies || isite2 == d_params[iparam].ispecies) continue; + + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + nMoleculesOFAold[id] += dvector(ispecies+nspecies,id); + nMoleculesOFA[id] += dvector(ispecies,id); + } + } + } + + // Make a reduction. + #pragma omp simd reduction(+:errorFlag1) + for (int id = idx_begin; id < idx_end; ++id) + { + if ( nTotal[id] < MY_EPSILON || nTotalold[id] < MY_EPSILON ) + errorFlag1 = 1; + + // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) + fractionOFAold[id] = nMoleculesOFAold[id] / nTotalold[id]; + fractionOFA[id] = nMoleculesOFA[id] / nTotal[id]; + } + + for (int ispecies = 0; ispecies < nspecies; ispecies++) { + const int iparam = d_mol2param[ispecies]; + if (iparam < 0 || d_params[iparam].potentialType != exp6PotentialType ) continue; + + // If Site1 matches a pure species, then grab the parameters + if (isite1 == d_params[iparam].ispecies) + { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + rm1_old[id] = d_params[iparam].rm; + rm1[id] = d_params[iparam].rm; + epsilon1_old[id] = d_params[iparam].epsilon; + epsilon1[id] = d_params[iparam].epsilon; + alpha1_old[id] = d_params[iparam].alpha; + alpha1[id] = d_params[iparam].alpha; + + // Compute the mole fraction of Site1 + nMoleculesOld1[id] = dvector(ispecies+nspecies,id); + nMolecules1[id] = dvector(ispecies,id); + fractionOld1[id] = nMoleculesOld1[id]/nTotalold[id]; + fraction1[id] = nMolecules1[id]/nTotal[id]; + } + } + + // If Site2 matches a pure species, then grab the parameters + if (isite2 == d_params[iparam].ispecies) + { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + rm2_old[id] = d_params[iparam].rm; + rm2[id] = d_params[iparam].rm; + epsilon2_old[id] = d_params[iparam].epsilon; + epsilon2[id] = d_params[iparam].epsilon; + alpha2_old[id] = d_params[iparam].alpha; + alpha2[id] = d_params[iparam].alpha; + + // Compute the mole fraction of Site2 + nMoleculesOld2[id] = dvector(ispecies+nspecies,id); + nMolecules2[id] = dvector(ispecies,id); + fractionOld2[id] = nMoleculesOld2[id]/nTotalold[id]; + fraction2[id] = nMolecules2[id]/nTotal[id]; + } + } + + // If Site1 or Site2 matches is a fluid, then compute the paramters + if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) { + if (isite1 == d_params[iparam].ispecies || isite2 == d_params[iparam].ispecies) continue; + + const double rmi = d_params[iparam].rm; + const double epsiloni = d_params[iparam].epsilon; + const double alphai = d_params[iparam].alpha; + + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + if(nMoleculesOFA[id] 0.0){ + rm3_old[id] += xMolei_old[id]*xMolej_old*rm3ij; + epsilon_old[id] += xMolei_old[id]*xMolej_old*rm3ij*epsilonij; + alpha_old[id] += xMolei_old[id]*xMolej_old*rm3ij*epsilonij*alphaij; + } + if(fractionOFA[id] > 0.0){ + rm3[id] += xMolei[id]*xMolej*rm3ij; + epsilon[id] += xMolei[id]*xMolej*rm3ij*epsilonij; + alpha[id] += xMolei[id]*xMolej*rm3ij*epsilonij*alphaij; + } + } + } + } + } + + if (isOneFluidApprox(isite1)) + { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + rm1[id] = cbrt(rm3[id]); + if(rm1[id] < MY_EPSILON) { + rm1[id] = 0.0; + epsilon1[id] = 0.0; + alpha1[id] = 0.0; + } else { + epsilon1[id] = epsilon[id] / rm3[id]; + alpha1[id] = alpha[id] / epsilon1[id] / rm3[id]; + } + nMolecules1[id] = 1.0-(nTotal[id]-nMoleculesOFA[id]); + fraction1[id] = fractionOFA[id]; + + rm1_old[id] = cbrt(rm3_old[id]); + if(rm1_old[id] < MY_EPSILON) { + rm1_old[id] = 0.0; + epsilon1_old[id] = 0.0; + alpha1_old[id] = 0.0; + } else { + epsilon1_old[id] = epsilon_old[id] / rm3_old[id]; + alpha1_old[id] = alpha_old[id] / epsilon1_old[id] / rm3_old[id]; + } + nMoleculesOld1[id] = 1.0-(nTotalold[id]-nMoleculesOFAold[id]); + fractionOld1[id] = fractionOFAold[id]; + } + + if(scalingFlag == EXPONENT) { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + exponentScaling(nMoleculesOFA[id],epsilon1[id],rm1[id]); + exponentScaling(nMoleculesOFAold[id],epsilon1_old[id],rm1_old[id]); + } + } + else if(scalingFlag == POLYNOMIAL){ + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + polynomialScaling(nMoleculesOFA[id],alpha1[id],epsilon1[id],rm1[id]); + polynomialScaling(nMoleculesOFAold[id],alpha1_old[id],epsilon1_old[id],rm1_old[id]); + } + } + } + + if (isOneFluidApprox(isite2)) + { + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + rm2[id] = cbrt(rm3[id]); + if(rm2[id] < MY_EPSILON) { + rm2[id] = 0.0; + epsilon2[id] = 0.0; + alpha2[id] = 0.0; + } else { + epsilon2[id] = epsilon[id] / rm3[id]; + alpha2[id] = alpha[id] / epsilon2[id] / rm3[id]; + } + nMolecules2[id] = 1.0-(nTotal[id]-nMoleculesOFA[id]); + fraction2[id] = fractionOFA[id]; + + rm2_old[id] = cbrt(rm3_old[id]); + if(rm2_old[id] < MY_EPSILON) { + rm2_old[id] = 0.0; + epsilon2_old[id] = 0.0; + alpha2_old[id] = 0.0; + } else { + epsilon2_old[id] = epsilon_old[id] / rm3_old[id]; + alpha2_old[id] = alpha_old[id] / epsilon2_old[id] / rm3_old[id]; + } + nMoleculesOld2[id] = 1.0-(nTotalold[id]-nMoleculesOFAold[id]); + fractionOld2[id] = fractionOFAold[id]; + } + + if(scalingFlag == EXPONENT){ + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + exponentScaling(nMoleculesOFA[id],epsilon2[id],rm2[id]); + exponentScaling(nMoleculesOFAold[id],epsilon2_old[id],rm2_old[id]); + } + } + else if(scalingFlag == POLYNOMIAL){ + #pragma ivdep + for (int id = idx_begin; id < idx_end; ++id) + { + polynomialScaling(nMoleculesOFA[id],alpha2[id],epsilon2[id],rm2[id]); + polynomialScaling(nMoleculesOFAold[id],alpha2_old[id],epsilon2_old[id],rm2_old[id]); + } + } + } + + // Check that no fractions are less than zero + #pragma omp simd reduction(+:errorFlag2) + for (int id = idx_begin; id < idx_end; ++id) + { + if(fraction1[id] < 0.0 || nMolecules1[id] < 0.0){ + if(fraction1[id] < -MY_EPSILON || nMolecules1[id] < -MY_EPSILON){ + errorFlag2 = 2; + } + nMolecules1[id] = 0.0; + fraction1[id] = 0.0; + } + if(fraction2[id] < 0.0 || nMolecules2[id] < 0.0){ + if(fraction2[id] < -MY_EPSILON || nMolecules2[id] < -MY_EPSILON){ + errorFlag2 = 2; + } + nMolecules2[id] = 0.0; + fraction2[id] = 0.0; + } + if(fractionOld1[id] < 0.0 || nMoleculesOld1[id] < 0.0){ + if(fractionOld1[id] < -MY_EPSILON || nMoleculesOld1[id] < -MY_EPSILON){ + errorFlag2 = 2; + } + nMoleculesOld1[id] = 0.0; + fractionOld1[id] = 0.0; + } + if(fractionOld2[id] < 0.0 || nMoleculesOld2[id] < 0.0){ + if(fractionOld2[id] < -MY_EPSILON || nMoleculesOld2[id] < -MY_EPSILON){ + errorFlag2 = 2; + } + nMoleculesOld2[id] = 0.0; + fractionOld2[id] = 0.0; + } + + if(fractionalWeighting){ + mixWtSite1old[id] = fractionOld1[id]; + mixWtSite1[id] = fraction1[id]; + mixWtSite2old[id] = fractionOld2[id]; + mixWtSite2[id] = fraction2[id]; + } else { + mixWtSite1old[id] = nMoleculesOld1[id]; + mixWtSite1[id] = nMolecules1[id]; + mixWtSite2old[id] = nMoleculesOld2[id]; + mixWtSite2[id] = nMolecules2[id]; + } + } + + } // end parallel region + + if (errorFlag1 > 0) + errorFlag = 1; + + if (errorFlag2 > 0) + errorFlag = 2; +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 488c9d0039..55b29f559b 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -133,6 +133,9 @@ class PairExp6rxKokkos : public PairExp6rx { KOKKOS_INLINE_FUNCTION void getMixingWeights(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const; + template + void getMixingWeightsVect(const int, int, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &) const; + KOKKOS_INLINE_FUNCTION void exponentScaling(double, double &, double &) const; From ec192a95cb1465184f29f9f6bae06da9815411dc Mon Sep 17 00:00:00 2001 From: "Christopher P. Stone" Date: Thu, 16 Mar 2017 22:28:19 -0400 Subject: [PATCH 208/267] Cleaned up the non-kokkos part of KOKKOS/pair_exp6_rx_kokkos.cpp --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 78 +++++++----------------------- 1 file changed, 17 insertions(+), 61 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index df663c9df9..d1481e6a44 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -187,42 +187,26 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) { const int np_total = nlocal + atom->nghost; - PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); - PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); - PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); + PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); + PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); + PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); PairExp6ParamData.mixWtSite1 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1" ,np_total); - PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); - PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); - PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); + PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); + PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); + PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); PairExp6ParamData.mixWtSite2 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2" ,np_total); - PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); - PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); - PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); + PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); + PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); + PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); PairExp6ParamData.mixWtSite1old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1old",np_total); - PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); - PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); - PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); + PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); + PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); + PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); - //Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); - - //typename AT::t_float_1d epsilon1 ("epsilon1" ,np_total); - //typename AT::t_float_1d alpha1 ("alpha1" ,np_total); - //typename AT::t_float_1d rm1 ("rm1" ,np_total); - //typename AT::t_float_1d mixWtSite1 ("mixWtSite1" ,np_total); - //typename AT::t_float_1d epsilon2 ("epsilon2" ,np_total); - //typename AT::t_float_1d alpha2 ("alpha2" ,np_total); - //typename AT::t_float_1d rm2 ("rm2" ,np_total); - //typename AT::t_float_1d mixWtSite2 ("mixWtSite2" ,np_total); - //typename AT::t_float_1d epsilonOld1 ("epsilonOld1" ,np_total); - //typename AT::t_float_1d alphaOld1 ("alphaOld1" ,np_total); - //typename AT::t_float_1d rmOld1 ("rmOld1" ,np_total); - //typename AT::t_float_1d mixWtSite1old("mixWtSite1old",np_total); - //typename AT::t_float_1d epsilonOld2 ("epsilonOld2" ,np_total); - //typename AT::t_float_1d alphaOld2 ("alphaOld2" ,np_total); - //typename AT::t_float_1d rmOld2 ("rmOld2" ,np_total); - //typename AT::t_float_1d mixWtSite2old("mixWtSite2old",np_total); - +#ifdef KOKKOS_HAVE_CUDA + Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); +#else int errorFlag = 0; getMixingWeightsVect (np_total, errorFlag, PairExp6ParamData.epsilon1, PairExp6ParamData.alpha1, @@ -244,35 +228,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON."); else if (errorFlag == 2) error->all(FLERR,"Computed fraction less than -10*DBL_EPSILON"); - - //#define _test_var(var) { \ - // double ref2 = 0, err2 = 0; \ - // for (int id = 0; id < np_total; ++id) \ - // { \ - // double ref = PairExp6ParamData. var [id]; \ - // double diff = ref - var[id]; \ - // ref2 += ref*ref; \ - // err2 += diff*diff; \ - // } \ - // if (ref2 < 1e-20) ref2 = 1.0; \ - // if (sqrt(err2)/sqrt(ref2) > 1e-12) \ - // printf("%s: %e %e %e\n", # var, sqrt(ref2), sqrt(err2), sqrt(err2)/sqrt(ref2)); \ - //} - //_test_var( epsilon1); - //_test_var( alpha1); - //_test_var( rm1); - //_test_var( epsilon2); - //_test_var( alpha2); - //_test_var( rm2); - //_test_var( mixWtSite2); - //_test_var( epsilonOld1); - //_test_var( alphaOld1); - //_test_var( rmOld1); - //_test_var( mixWtSite1old); - //_test_var( epsilonOld2); - //_test_var( alphaOld2); - //_test_var( rmOld2); - //_test_var( mixWtSite2old); +#endif } TimerType t_mix_stop = getTimeStamp(); @@ -349,7 +305,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) copymode = 0; TimerType t_stop = getTimeStamp(); - printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); + //printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); } template From 64fdb1f528bcaacc8c6a7ad1ea1b4824533af838 Mon Sep 17 00:00:00 2001 From: "Christopher P. Stone" Date: Fri, 17 Mar 2017 15:52:40 -0400 Subject: [PATCH 209/267] Kokkos/pair_exp6_rx_kokkos optimized for SIMD on the inner j-loop. --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 518 ++++++++++++++++++++++++++++- src/KOKKOS/pair_exp6_rx_kokkos.h | 4 + 2 files changed, 521 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 64a91c9e65..85d919091f 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -349,7 +349,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) copymode = 0; TimerType t_stop = getTimeStamp(); - //printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); + printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); } template @@ -378,6 +378,14 @@ template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii, EV_FLOAT& ev) const { + { + if (isite1 == isite2) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); + return; + } + // These arrays are atomic for Half/Thread neighbor style Kokkos::View::value> > a_f = f; Kokkos::View::value> > a_uCG = uCG; @@ -734,6 +742,14 @@ template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics, const int &ii, EV_FLOAT& ev) const { + { + if (isite1 == isite2) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); + return; + } + int tid = 0; #ifndef KOKKOS_HAVE_CUDA tid = DeviceType::hardware_thread_id(); @@ -1075,6 +1091,506 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics + KOKKOS_INLINE_FUNCTION +double __powint(const double& x, const int) +{ + static_assert(n == 12, "__powint<> only supports specific integer powers."); + + if (n == 12) + { + // Do x^12 here ... x^12 = (x^3)^4 + double x3 = x*x*x; + return x3*x3*x3*x3; + } +} + +template + template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& ev) const +{ + // These arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + Kokkos::View::value> > a_uCG = uCG; + Kokkos::View::value> > a_uCGnew = uCGnew; + + int tid = 0; +#ifndef KOKKOS_HAVE_CUDA + tid = DeviceType::hardware_thread_id(); +#endif + + const int nRep = 12; + const double shift = 1.05; + + const int i = d_ilist[ii]; + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); + const int itype = type[i]; + const int jnum = d_numneigh[i]; + + double fx_i = 0.0; + double fy_i = 0.0; + double fz_i = 0.0; + double uCG_i = 0.0; + double uCGnew_i = 0.0; + + // Constant values for this atom. + const double epsilon1_i = PairExp6ParamData.epsilon1[i]; + const double alpha1_i = PairExp6ParamData.alpha1[i]; + const double rm1_i = PairExp6ParamData.rm1[i]; + const double mixWtSite1_i = PairExp6ParamData.mixWtSite1[i]; + const double epsilon2_i = PairExp6ParamData.epsilon2[i]; + const double alpha2_i = PairExp6ParamData.alpha2[i]; + const double rm2_i = PairExp6ParamData.rm2[i]; + const double mixWtSite2_i = PairExp6ParamData.mixWtSite2[i]; + const double epsilonOld1_i = PairExp6ParamData.epsilonOld1[i]; + const double alphaOld1_i = PairExp6ParamData.alphaOld1[i]; + const double rmOld1_i = PairExp6ParamData.rmOld1[i]; + const double mixWtSite1old_i = PairExp6ParamData.mixWtSite1old[i]; + const double epsilonOld2_i = PairExp6ParamData.epsilonOld2[i]; + const double alphaOld2_i = PairExp6ParamData.alphaOld2[i]; + const double rmOld2_i = PairExp6ParamData.rmOld2[i]; + const double mixWtSite2old_i = PairExp6ParamData.mixWtSite2old[i]; + + // Do error testing locally. + bool hasError = false; + + // Process this many neighbors concurrently -- if possible. + const int batchSize = 8; + + int neigh_j[batchSize]; + double evdwlOld_j[batchSize]; + double uCGnew_j[batchSize]; + double fpair_j[batchSize]; + double delx_j[batchSize]; + double dely_j[batchSize]; + double delz_j[batchSize]; + double cutsq_j[batchSize]; + //double j_epsilon1[batchSize] ; + //double j_alpha1[batchSize] ; + //double j_rm1[batchSize] ; + //double j_mixWtSite1[batchSize] ; + //double j_epsilon2[batchSize] ; + //double j_alpha2[batchSize] ; + //double j_rm2[batchSize] ; + //double j_mixWtSite2[batchSize] ; + //double j_epsilonOld1[batchSize] ; + //double j_alphaOld1[batchSize] ; + //double j_rmOld1[batchSize] ; + //double j_mixWtSite1old[batchSize] ; + //double j_epsilonOld2[batchSize] ; + //double j_alphaOld2[batchSize] ; + //double j_rmOld2[batchSize] ; + //double j_mixWtSite2old[batchSize] ; + + for (int jptr = 0; jptr < jnum; ) + { + // The core computation here is very expensive so let's only bother with + // those that pass rsq < cutsq. + + for (int j = 0; j < batchSize; ++j) + { + evdwlOld_j[j] = 0.0; + uCGnew_j[j] = 0.0; + fpair_j[j] = 0.0; + //delx_j[j] = 0.0; + //dely_j[j] = 0.0; + //delz_j[j] = 0.0; + //cutsq_j[j] = 0.0; + } + + int niters = 0; + + for (; (jptr < jnum) && (niters < batchSize); ++jptr) + { + const int j = d_neighbors(i,jptr) & NEIGHMASK; + + const double delx = xtmp - x(j,0); + const double dely = ytmp - x(j,1); + const double delz = ztmp - x(j,2); + + const double rsq = delx*delx + dely*dely + delz*delz; + const int jtype = type[j]; + + if (rsq < d_cutsq(itype,jtype)) + { + delx_j [niters] = delx; + dely_j [niters] = dely; + delz_j [niters] = delz; + cutsq_j[niters] = d_cutsq(itype,jtype); + + neigh_j[niters] = d_neighbors(i,jptr); + + //j_epsilon1[niters] = PairExp6ParamData.epsilon1[j]; + //j_alpha1[niters] = PairExp6ParamData.alpha1[j]; + //j_rm1[niters] = PairExp6ParamData.rm1[j]; + //j_mixWtSite1[niters] = PairExp6ParamData.mixWtSite1[j]; + //j_epsilon2[niters] = PairExp6ParamData.epsilon2[j]; + //j_alpha2[niters] = PairExp6ParamData.alpha2[j]; + //j_rm2[niters] = PairExp6ParamData.rm2[j]; + //j_mixWtSite2[niters] = PairExp6ParamData.mixWtSite2[j]; + //j_epsilonOld1[niters] = PairExp6ParamData.epsilonOld1[j]; + //j_alphaOld1[niters] = PairExp6ParamData.alphaOld1[j]; + //j_rmOld1[niters] = PairExp6ParamData.rmOld1[j]; + //j_mixWtSite1old[niters] = PairExp6ParamData.mixWtSite1old[j]; + //j_epsilonOld2[niters] = PairExp6ParamData.epsilonOld2[j]; + //j_alphaOld2[niters] = PairExp6ParamData.alphaOld2[j]; + //j_rmOld2[niters] = PairExp6ParamData.rmOld2[j]; + //j_mixWtSite2old[niters] = PairExp6ParamData.mixWtSite2old[j]; + + ++niters; + } + } + + // reduction here. + #pragma simd reduction(+: fx_i, fy_i, fz_i, uCG_i, uCGnew_i) reduction(|: hasError) + for (int jlane = 0; jlane < niters; jlane++) + { + int j = neigh_j[jlane]; + const double factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + const double delx = delx_j[jlane]; + const double dely = dely_j[jlane]; + const double delz = delz_j[jlane]; + + const double rsq = delx*delx + dely*dely + delz*delz; + // const int jtype = type[j]; + + // if (rsq < d_cutsq(itype,jtype)) // optimize + { + const double r2inv = 1.0/rsq; + const double r6inv = r2inv*r2inv*r2inv; + + const double r = sqrt(rsq); + const double rCut2inv = 1.0/ cutsq_j[jlane]; + const double rCut6inv = rCut2inv*rCut2inv*rCut2inv; + const double rCut = sqrt( cutsq_j[jlane] ); + const double rCutInv = 1.0/rCut; + + // + // A. Compute the exp-6 potential + // + + // A1. Get alpha, epsilon and rm for particle j + + const double epsilon1_j = PairExp6ParamData.epsilon1[j]; + const double alpha1_j = PairExp6ParamData.alpha1[j]; + const double rm1_j = PairExp6ParamData.rm1[j]; + const double mixWtSite1_j = PairExp6ParamData.mixWtSite1[j]; + const double epsilon2_j = PairExp6ParamData.epsilon2[j]; + const double alpha2_j = PairExp6ParamData.alpha2[j]; + const double rm2_j = PairExp6ParamData.rm2[j]; + const double mixWtSite2_j = PairExp6ParamData.mixWtSite2[j]; + const double epsilonOld1_j = PairExp6ParamData.epsilonOld1[j]; + const double alphaOld1_j = PairExp6ParamData.alphaOld1[j]; + const double rmOld1_j = PairExp6ParamData.rmOld1[j]; + const double mixWtSite1old_j = PairExp6ParamData.mixWtSite1old[j]; + const double epsilonOld2_j = PairExp6ParamData.epsilonOld2[j]; + const double alphaOld2_j = PairExp6ParamData.alphaOld2[j]; + const double rmOld2_j = PairExp6ParamData.rmOld2[j]; + const double mixWtSite2old_j = PairExp6ParamData.mixWtSite2old[j]; + //const double epsilon1_j = j_epsilon1[jlane]; + //const double alpha1_j = j_alpha1[jlane]; + //const double rm1_j = j_rm1[jlane]; + //const double mixWtSite1_j = j_mixWtSite1[jlane]; + //const double epsilon2_j = j_epsilon2[jlane]; + //const double alpha2_j = j_alpha2[jlane]; + //const double rm2_j = j_rm2[jlane]; + //const double mixWtSite2_j = j_mixWtSite2[jlane]; + //const double epsilonOld1_j = j_epsilonOld1[jlane]; + //const double alphaOld1_j = j_alphaOld1[jlane]; + //const double rmOld1_j = j_rmOld1[jlane]; + //const double mixWtSite1old_j = j_mixWtSite1old[jlane]; + //const double epsilonOld2_j = j_epsilonOld2[jlane]; + //const double alphaOld2_j = j_alphaOld2[jlane]; + //const double rmOld2_j = j_rmOld2[jlane]; + //const double mixWtSite2old_j = j_mixWtSite2old[jlane]; + + // A2. Apply Lorentz-Berthelot mixing rules for the i-j pair + const double alphaOld12_ij = sqrt(alphaOld1_i*alphaOld2_j); + const double rmOld12_ij = 0.5*(rmOld1_i + rmOld2_j); + const double epsilonOld12_ij = sqrt(epsilonOld1_i*epsilonOld2_j); + const double alphaOld21_ij = sqrt(alphaOld2_i*alphaOld1_j); + const double rmOld21_ij = 0.5*(rmOld2_i + rmOld1_j); + const double epsilonOld21_ij = sqrt(epsilonOld2_i*epsilonOld1_j); + + const double alpha12_ij = sqrt(alpha1_i*alpha2_j); + const double rm12_ij = 0.5*(rm1_i + rm2_j); + const double epsilon12_ij = sqrt(epsilon1_i*epsilon2_j); + const double alpha21_ij = sqrt(alpha2_i*alpha1_j); + const double rm21_ij = 0.5*(rm2_i + rm1_j); + const double epsilon21_ij = sqrt(epsilon2_i*epsilon1_j); + + double evdwlOldEXP6_12 = 0.0; + double evdwlOldEXP6_21 = 0.0; + double evdwlEXP6_12 = 0.0; + double evdwlEXP6_21 = 0.0; + double fpairOldEXP6_12 = 0.0; + double fpairOldEXP6_21 = 0.0; + + if(rmOld12_ij!=0.0 && rmOld21_ij!=0.0) + { + hasError |= (alphaOld21_ij == 6.0 || alphaOld12_ij == 6.0); + + // A3. Compute some convenient quantities for evaluating the force + double rminv = 1.0/rmOld12_ij; + double buck1 = epsilonOld12_ij / (alphaOld12_ij - 6.0); + double rexp = expValue(alphaOld12_ij*(1.0-r*rminv)); + double rm2ij = rmOld12_ij*rmOld12_ij; + double rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + double rCutExp = expValue(alphaOld12_ij*(1.0-rCut*rminv)); + double buck2 = 6.0*alphaOld12_ij; + double urc = buck1*(6.0*rCutExp - alphaOld12_ij*rm6ij*rCut6inv); + double durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + double rin1 = shift*rmOld12_ij*func_rin(alphaOld12_ij); + + if(r < rin1){ + const double rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + const double rin6inv = 1.0/rin6; + + const double rin1exp = expValue(alphaOld12_ij*(1.0-rin1*rminv)); + + const double uin1 = buck1*(6.0*rin1exp - alphaOld12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + const double win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + const double aRep = win1*__powint<12>(rin1,nRep)/nRep; + + const double uin1rep = aRep/__powint<12>(rin1,nRep); + + const double forceExp6 = double(nRep)*aRep/__powint<12>(r,nRep); + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = uin1 - uin1rep + aRep/__powint<12>(r,nRep); + } else { + const double forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_12 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_12 = buck1*(6.0*rexp - alphaOld12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + // A3. Compute some convenient quantities for evaluating the force + rminv = 1.0/rmOld21_ij; + buck1 = epsilonOld21_ij / (alphaOld21_ij - 6.0); + buck2 = 6.0*alphaOld21_ij; + rexp = expValue(alphaOld21_ij*(1.0-r*rminv)); + rm2ij = rmOld21_ij*rmOld21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alphaOld21_ij*(1.0-rCut*rminv)); + buck2 = 6.0*alphaOld21_ij; + urc = buck1*(6.0*rCutExp - alphaOld21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp* rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rmOld21_ij*func_rin(alphaOld21_ij); + + if(r < rin1){ + const double rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + const double rin6inv = 1.0/rin6; + + const double rin1exp = expValue(alphaOld21_ij*(1.0-rin1*rminv)); + + const double uin1 = buck1*(6.0*rin1exp - alphaOld21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + const double win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + const double aRep = win1*__powint<12>(rin1,nRep)/nRep; + + const double uin1rep = aRep/__powint<12>(rin1,nRep); + + const double forceExp6 = double(nRep)*aRep/__powint<12>(r,nRep); + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = uin1 - uin1rep + aRep/__powint<12>(r,nRep); + } else { + const double forceExp6 = buck1*buck2*(r*rexp*rminv - rm6ij*r6inv) + r*durc; + fpairOldEXP6_21 = factor_lj*forceExp6*r2inv; + + evdwlOldEXP6_21 = buck1*(6.0*rexp - alphaOld21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + double evdwlOld; + if (Site1EqSite2) + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12; + else + evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwlOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*evdwlOldEXP6_21; + + evdwlOld *= factor_lj; + + uCG_i += 0.5*evdwlOld; + + evdwlOld_j[jlane] = evdwlOld; + } + + if(rm12_ij!=0.0 && rm21_ij!=0.0) + { + hasError |= (alpha21_ij == 6.0 || alpha12_ij == 6.0); + + // A3. Compute some convenient quantities for evaluating the force + double rminv = 1.0/rm12_ij; + double buck1 = epsilon12_ij / (alpha12_ij - 6.0); + double buck2 = 6.0*alpha12_ij; + double rexp = expValue(alpha12_ij*(1.0-r*rminv)); + double rm2ij = rm12_ij*rm12_ij; + double rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + double rCutExp = expValue(alpha12_ij*(1.0-rCut*rminv)); + double urc = buck1*(6.0*rCutExp - alpha12_ij*rm6ij*rCut6inv); + double durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + double rin1 = shift*rm12_ij*func_rin(alpha12_ij); + + if(r < rin1){ + const double rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + const double rin6inv = 1.0/rin6; + + const double rin1exp = expValue(alpha12_ij*(1.0-rin1*rminv)); + + const double uin1 = buck1*(6.0*rin1exp - alpha12_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + const double win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + const double aRep = win1*__powint<12>(rin1,nRep)/nRep; + + const double uin1rep = aRep/__powint<12>(rin1,nRep); + + evdwlEXP6_12 = uin1 - uin1rep + aRep/__powint<12>(r,nRep); + } else { + evdwlEXP6_12 = buck1*(6.0*rexp - alpha12_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + + rminv = 1.0/rm21_ij; + buck1 = epsilon21_ij / (alpha21_ij - 6.0); + buck2 = 6.0*alpha21_ij; + rexp = expValue(alpha21_ij*(1.0-r*rminv)); + rm2ij = rm21_ij*rm21_ij; + rm6ij = rm2ij*rm2ij*rm2ij; + + // Compute the shifted potential + rCutExp = expValue(alpha21_ij*(1.0-rCut*rminv)); + urc = buck1*(6.0*rCutExp - alpha21_ij*rm6ij*rCut6inv); + durc = -buck1*buck2*(rCutExp*rminv - rCutInv*rm6ij*rCut6inv); + rin1 = shift*rm21_ij*func_rin(alpha21_ij); + + if(r < rin1){ + const double rin6 = rin1*rin1*rin1*rin1*rin1*rin1; + const double rin6inv = 1.0/rin6; + + const double rin1exp = expValue(alpha21_ij*(1.0-rin1*rminv)); + + const double uin1 = buck1*(6.0*rin1exp - alpha21_ij*rm6ij*rin6inv) - urc - durc*(rin1-rCut); + + const double win1 = buck1*buck2*(rin1*rin1exp*rminv - rm6ij*rin6inv) + rin1*durc; + + const double aRep = win1*__powint<12>(rin1,nRep)/nRep; + + const double uin1rep = aRep/__powint<12>(rin1,nRep); + + evdwlEXP6_21 = uin1 - uin1rep + aRep/__powint<12>(r,nRep); + } else { + evdwlEXP6_21 = buck1*(6.0*rexp - alpha21_ij*rm6ij*r6inv) - urc - durc*(r-rCut); + } + } + + // + // Apply Mixing Rule to get the overall force for the CG pair + // + double fpair; + if (Site1EqSite2) + fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12; + else + fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpairOldEXP6_12 + sqrt(mixWtSite2old_i*mixWtSite1old_j)*fpairOldEXP6_21; + + double evdwl; + if (Site1EqSite2) + evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12; + else + evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwlEXP6_12 + sqrt(mixWtSite2_i*mixWtSite1_j)*evdwlEXP6_21; + + evdwl *= factor_lj; + + fpair_j[jlane] = fpair; + + fx_i += delx*fpair; + fy_i += dely*fpair; + fz_i += delz*fpair; + + uCGnew_i += 0.5*evdwl; + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)) + uCGnew_j[jlane] = 0.5*evdwl; + + } // if rsq < cutsq + + } // end jlane loop. + + for (int jlane = 0; jlane < niters; jlane++) + { + const int j = neigh_j[jlane] & NEIGHMASK; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) + if (UseAtomics) + a_uCG(j) += 0.5*evdwlOld_j[jlane]; + else + t_uCG(tid,j) += 0.5*evdwlOld_j[jlane]; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) + if (UseAtomics) + a_uCGnew(j) += uCGnew_j[jlane]; + else + t_uCGnew(tid,j) += uCGnew_j[jlane]; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + if (UseAtomics) + { + a_f(j,0) -= delx_j[jlane]*fpair_j[jlane]; + a_f(j,1) -= dely_j[jlane]*fpair_j[jlane]; + a_f(j,2) -= delz_j[jlane]*fpair_j[jlane]; + } + else + { + t_f(tid,j,0) -= delx_j[jlane]*fpair_j[jlane]; + t_f(tid,j,1) -= dely_j[jlane]*fpair_j[jlane]; + t_f(tid,j,2) -= delz_j[jlane]*fpair_j[jlane]; + } + } + + double evdwl = evdwlOld_j[jlane]; + if (EVFLAG) + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,evdwl,fpair_j[jlane],delx_j[jlane],dely_j[jlane],delz_j[jlane]); + } + } + + if (hasError) + k_error_flag.d_view() = 1; + + if (UseAtomics) + { + a_f(i,0) += fx_i; + a_f(i,1) += fy_i; + a_f(i,2) += fz_i; + a_uCG(i) += uCG_i; + a_uCGnew(i) += uCGnew_i; + } + else + { + t_f(tid,i,0) += fx_i; + t_f(tid,i,1) += fy_i; + t_f(tid,i,2) += fz_i; + t_uCG(tid,i) += uCG_i; + t_uCGnew(tid,i) += uCGnew_i; + } +} + template template KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index ebbc26ea20..6899e5ff62 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -91,6 +91,10 @@ class PairExp6rxKokkos : public PairExp6rx { KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxComputeNoAtomics, const int&, EV_FLOAT&) const; + template + KOKKOS_INLINE_FUNCTION + void vectorized_operator(const int&, EV_FLOAT&) const; + template KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxComputeNoAtomics, const int&) const; From 75670244bb5d30c407e8fc3635f06cf9b08ba817 Mon Sep 17 00:00:00 2001 From: "Christopher P. Stone" Date: Fri, 17 Mar 2017 17:02:47 -0400 Subject: [PATCH 210/267] Added ONE-TYPE template capability to vectorized_operator and cleaned up timers. --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 98 +++++++++++------------------- src/KOKKOS/pair_exp6_rx_kokkos.h | 2 +- 2 files changed, 37 insertions(+), 63 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 85d919091f..5c74cba8c7 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -348,8 +348,8 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) copymode = 0; - TimerType t_stop = getTimeStamp(); - printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); + //TimerType t_stop = getTimeStamp(); + //printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); } template @@ -379,10 +379,17 @@ KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii, EV_FLOAT& ev) const { { + const bool one_type = (atom->ntypes == 1); if (isite1 == isite2) - this->vectorized_operator(ii, ev); + if (one_type) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); else - this->vectorized_operator(ii, ev); + if (one_type) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); return; } @@ -743,10 +750,17 @@ KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics, const int &ii, EV_FLOAT& ev) const { { + const bool one_type = (atom->ntypes == 1); if (isite1 == isite2) - this->vectorized_operator(ii, ev); + if (one_type) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); else - this->vectorized_operator(ii, ev); + if (one_type) + this->vectorized_operator(ii, ev); + else + this->vectorized_operator(ii, ev); return; } @@ -1109,7 +1123,7 @@ double __powint(const double& x, const int) } template - template + template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& ev) const { @@ -1157,6 +1171,12 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& const double rmOld2_i = PairExp6ParamData.rmOld2[i]; const double mixWtSite2old_i = PairExp6ParamData.mixWtSite2old[i]; + const double cutsq_type11 = d_cutsq(1,1); + const double rCut2inv_type11 = 1.0/ cutsq_type11; + const double rCut6inv_type11 = rCut2inv_type11*rCut2inv_type11*rCut2inv_type11; + const double rCut_type11 = sqrt( cutsq_type11 ); + const double rCutInv_type11 = 1.0/rCut_type11; + // Do error testing locally. bool hasError = false; @@ -1171,22 +1191,6 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& double dely_j[batchSize]; double delz_j[batchSize]; double cutsq_j[batchSize]; - //double j_epsilon1[batchSize] ; - //double j_alpha1[batchSize] ; - //double j_rm1[batchSize] ; - //double j_mixWtSite1[batchSize] ; - //double j_epsilon2[batchSize] ; - //double j_alpha2[batchSize] ; - //double j_rm2[batchSize] ; - //double j_mixWtSite2[batchSize] ; - //double j_epsilonOld1[batchSize] ; - //double j_alphaOld1[batchSize] ; - //double j_rmOld1[batchSize] ; - //double j_mixWtSite1old[batchSize] ; - //double j_epsilonOld2[batchSize] ; - //double j_alphaOld2[batchSize] ; - //double j_rmOld2[batchSize] ; - //double j_mixWtSite2old[batchSize] ; for (int jptr = 0; jptr < jnum; ) { @@ -1217,32 +1221,18 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& const double rsq = delx*delx + dely*dely + delz*delz; const int jtype = type[j]; - if (rsq < d_cutsq(itype,jtype)) + const double cutsq_ij = (OneType) ? cutsq_type11 : d_cutsq(itype,jtype); + + if (rsq < cutsq_ij) { delx_j [niters] = delx; dely_j [niters] = dely; delz_j [niters] = delz; - cutsq_j[niters] = d_cutsq(itype,jtype); + if (OneType == false) + cutsq_j[niters] = cutsq_ij; neigh_j[niters] = d_neighbors(i,jptr); - //j_epsilon1[niters] = PairExp6ParamData.epsilon1[j]; - //j_alpha1[niters] = PairExp6ParamData.alpha1[j]; - //j_rm1[niters] = PairExp6ParamData.rm1[j]; - //j_mixWtSite1[niters] = PairExp6ParamData.mixWtSite1[j]; - //j_epsilon2[niters] = PairExp6ParamData.epsilon2[j]; - //j_alpha2[niters] = PairExp6ParamData.alpha2[j]; - //j_rm2[niters] = PairExp6ParamData.rm2[j]; - //j_mixWtSite2[niters] = PairExp6ParamData.mixWtSite2[j]; - //j_epsilonOld1[niters] = PairExp6ParamData.epsilonOld1[j]; - //j_alphaOld1[niters] = PairExp6ParamData.alphaOld1[j]; - //j_rmOld1[niters] = PairExp6ParamData.rmOld1[j]; - //j_mixWtSite1old[niters] = PairExp6ParamData.mixWtSite1old[j]; - //j_epsilonOld2[niters] = PairExp6ParamData.epsilonOld2[j]; - //j_alphaOld2[niters] = PairExp6ParamData.alphaOld2[j]; - //j_rmOld2[niters] = PairExp6ParamData.rmOld2[j]; - //j_mixWtSite2old[niters] = PairExp6ParamData.mixWtSite2old[j]; - ++niters; } } @@ -1268,10 +1258,10 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& const double r6inv = r2inv*r2inv*r2inv; const double r = sqrt(rsq); - const double rCut2inv = 1.0/ cutsq_j[jlane]; - const double rCut6inv = rCut2inv*rCut2inv*rCut2inv; - const double rCut = sqrt( cutsq_j[jlane] ); - const double rCutInv = 1.0/rCut; + const double rCut2inv = (OneType) ? rCut2inv_type11 : (1.0/ cutsq_j[jlane]); + const double rCut6inv = (OneType) ? rCut6inv_type11 : (rCut2inv*rCut2inv*rCut2inv); + const double rCut = (OneType) ? rCut_type11 : (sqrt( cutsq_j[jlane] )); + const double rCutInv = (OneType) ? rCutInv_type11 : (1.0/rCut); // // A. Compute the exp-6 potential @@ -1295,22 +1285,6 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& const double alphaOld2_j = PairExp6ParamData.alphaOld2[j]; const double rmOld2_j = PairExp6ParamData.rmOld2[j]; const double mixWtSite2old_j = PairExp6ParamData.mixWtSite2old[j]; - //const double epsilon1_j = j_epsilon1[jlane]; - //const double alpha1_j = j_alpha1[jlane]; - //const double rm1_j = j_rm1[jlane]; - //const double mixWtSite1_j = j_mixWtSite1[jlane]; - //const double epsilon2_j = j_epsilon2[jlane]; - //const double alpha2_j = j_alpha2[jlane]; - //const double rm2_j = j_rm2[jlane]; - //const double mixWtSite2_j = j_mixWtSite2[jlane]; - //const double epsilonOld1_j = j_epsilonOld1[jlane]; - //const double alphaOld1_j = j_alphaOld1[jlane]; - //const double rmOld1_j = j_rmOld1[jlane]; - //const double mixWtSite1old_j = j_mixWtSite1old[jlane]; - //const double epsilonOld2_j = j_epsilonOld2[jlane]; - //const double alphaOld2_j = j_alphaOld2[jlane]; - //const double rmOld2_j = j_rmOld2[jlane]; - //const double mixWtSite2old_j = j_mixWtSite2old[jlane]; // A2. Apply Lorentz-Berthelot mixing rules for the i-j pair const double alphaOld12_ij = sqrt(alphaOld1_i*alphaOld2_j); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 6899e5ff62..9f38732c32 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -91,7 +91,7 @@ class PairExp6rxKokkos : public PairExp6rx { KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxComputeNoAtomics, const int&, EV_FLOAT&) const; - template + template KOKKOS_INLINE_FUNCTION void vectorized_operator(const int&, EV_FLOAT&) const; From 0cd3f0cd63f2305d2408bea36a18302ee11d9326 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Mar 2017 19:11:39 -0400 Subject: [PATCH 211/267] USER-DPD: bugfix for npair_half_bin_newton_ssa when bonds are involved. Only locals have valid special[] arrays, so when finding neighbors of ghosts, we have to swap the arguments to find_special(). --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index ab439d3731..a6479d4c4f 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -250,11 +250,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; - if (moltemplate) { - imol = molindex[i]; - iatom = molatom[i]; - tagprev = tag[i] - iatom - 1; - } ibin = coord2bin(x[i],xbin,ybin,zbin); @@ -281,12 +276,16 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) if (rsq <= cutneighsq[itype][jtype]) { if (molecular) { if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; + which = find_special(special[j],nspecial[j],tag[i]); + else { + int jmol = molindex[j]; + if (jmol >= 0) { + int jatom = molatom[j]; + which = find_special(onemols[jmol]->special[jatom], + onemols[jmol]->nspecial[jatom], + tag[i] - (tag[j] - jatom - 1)); + } else which = 0; + } if (which == 0) neighptr[n++] = j; else if (domain->minimum_image_check(delx,dely,delz)) neighptr[n++] = j; From fff43a4604a29de23f23f066027b5f8e41cd33f4 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Fri, 17 Mar 2017 19:33:04 -0400 Subject: [PATCH 212/267] USER-DPD Kokkos: bugfix for npair_ssa_kokkos.cpp corresponding to 0cd3f0cd --- src/KOKKOS/npair_ssa_kokkos.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index a9b59bfc96..7b5a569051 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -515,11 +515,11 @@ void NPairSSAKokkosExecute::build_ghosts() if(rsq <= cutneighsq(itype,jtype)) { if (molecular) { if (!moltemplate) - which = find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ + which = find_special(j,i); + /* else if (jmol >= 0) */ + /* which = find_special(onemols[jmol]->special[jatom], */ + /* onemols[jmol]->nspecial[jatom], */ + /* tag[i]-jtagprev); */ /* else which = 0; */ if (which == 0){ if(n Date: Sun, 19 Mar 2017 21:12:52 -0400 Subject: [PATCH 213/267] USER-DPD Kokkos: bugfix, add a misisng line of code in pair_exp6_rx_kokkos.cpp --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index abc158d72c..23fb4f59e5 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -917,6 +917,7 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub nMoleculesOld2 = dvector(ispecies+nspecies,id); nMolecules2 = dvector(ispecies,id); fractionOld2 = dvector(ispecies+nspecies,id)/nTotalold; + fraction2 = nMolecules2/nTotal; } // If Site1 or Site2 matches is a fluid, then compute the paramters From 3c91f9734dbb97f293718b0c3f5e5d5d98accc38 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Tue, 21 Mar 2017 17:12:09 -0400 Subject: [PATCH 214/267] make RK solver check in fix_rx_kokkos.cpp be as lenient as in fix_rx.cpp NOTE: the (y < -MY_EPSILON) test was too strict, but don't know by how much This needs to be revisited before merging back to LAMMPS master. --- src/KOKKOS/fix_rx_kokkos.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index ac81e5c2a7..d994b2c5d1 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -1389,9 +1389,9 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_solveSystemsone(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + //error->one(FLERR,"Computed concentration in RK solver is < -1.0e-10"); k_error_flag.d_view() = 2; // This should be an atomic update. } @@ -1599,9 +1599,9 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF // Store the solution back in dvector. for (int ispecies = 0; ispecies < nspecies; ispecies++) { - if (y[ispecies] < -MY_EPSILON) + if (y[ispecies] < -1.0e-10) { - //error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + //error->one(FLERR,"Computed concentration in RK solver is < -1.0e-10"); k_error_flag.d_view() = 2; // This should be an atomic update. } @@ -1639,7 +1639,7 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF k_error_flag.template modify(); k_error_flag.template sync(); if (k_error_flag.h_view() == 2) - error->one(FLERR,"Computed concentration in RK solver is < -10*DBL_EPSILON"); + error->one(FLERR,"Computed concentration in RK solver is < -1.0e-10"); // Signal that dvector has been modified on this execution space. atomKK->modified( execution_space, DVECTOR_MASK ); From b418b46a03acf427c5d9eb015d1ea202557caed8 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 26 Mar 2017 23:07:48 -0400 Subject: [PATCH 215/267] USER-DPD: bugfix for an array that changed length in the non-kokkos version. --- src/KOKKOS/npair_ssa_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 7b5a569051..699c2d3269 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -122,8 +122,8 @@ void NPairSSAKokkos::copy_stencil_info() NStencilSSA *ns_ssa = dynamic_cast(ns); if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object"); - k_nstencil_ssa = DAT::tdual_int_1d("NPairSSAKokkos:nstencil_ssa",8); - for (int k = 0; k < 8; ++k) { + k_nstencil_ssa = DAT::tdual_int_1d("NPairSSAKokkos:nstencil_ssa",5); + for (int k = 0; k < 5; ++k) { k_nstencil_ssa.h_view(k) = ns_ssa->nstencil_ssa[k]; } k_nstencil_ssa.modify(); From 5f0823172c4a2f76f6385e011e90876a08e7390c Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 27 Mar 2017 06:35:19 -0400 Subject: [PATCH 216/267] Make read_restart properly size the atom_vec_* data when reading via mpiio --- src/read_restart.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 6a950353ef..92d21a7062 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -208,6 +208,7 @@ void ReadRestart::command(int narg, char **arg) mpiio->read((headerOffset+assignedChunkOffset),assignedChunkSize,buf); mpiio->close(); + if (assignedChunkSize > atom->nmax) avec->grow(assignedChunkSize); m = 0; while (m < assignedChunkSize) m += avec->unpack_restart(&buf[m]); } From 28784a4ce2de90728365717a768c5d0f5b17772a Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 27 Mar 2017 08:38:40 -0500 Subject: [PATCH 217/267] Now with the correct math, make read_restart properly size the atom_vec_* data --- src/read_restart.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 92d21a7062..f29a603ef6 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -207,8 +207,23 @@ void ReadRestart::command(int narg, char **arg) memory->create(buf,assignedChunkSize,"read_restart:buf"); mpiio->read((headerOffset+assignedChunkOffset),assignedChunkSize,buf); mpiio->close(); - - if (assignedChunkSize > atom->nmax) avec->grow(assignedChunkSize); + if (!nextra) { // We can actually calculate number of atoms from assignedChunkSize + atom->nlocal = 1; // temporarily claim there is one atom... + int perAtomSize = avec->size_restart(); // ...so we can get its size + atom->nlocal = 0; // restore nlocal to zero atoms + int atomCt = (int) (assignedChunkSize / perAtomSize); +#ifdef DEBUG_ME_NOTNOW +fprintf(stdout, "ReadRestart::command %04d: pAS %d, aCt %d, nmax %d, chunckSize %12.0f, %12.0f\n" + ,me + ,perAtomSize + ,atomCt + ,atom->nmax + ,(double) assignedChunkSize + ,((double) perAtomSize) * atomCt +); +#endif + if (atomCt > atom->nmax) avec->grow(atomCt); + } m = 0; while (m < assignedChunkSize) m += avec->unpack_restart(&buf[m]); } From 0463923e330b070e578a38f7a2a0eda2b033c8c4 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 27 Mar 2017 10:41:32 -0500 Subject: [PATCH 218/267] USER-DPD Kokkos: tighten up the SSA data allocation to what is needed. A future version was planned to use more space for a ghost work queue. --- src/KOKKOS/npair_ssa_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 699c2d3269..59470189bc 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -205,7 +205,7 @@ void NPairSSAKokkos::build(NeighList *list_) { NeighListKokkos* list = (NeighListKokkos*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; - const int nl_size = (nlocal + atom->nghost) * 4; + const int nl_size = (nlocal * 4) + atom->nghost; list->grow(nl_size); // Make special larger SSA neighbor list ssa_phaseCt = sz1*sy1*sx1; From 661bd37e15bcb83266dbd28b294889fe0ece9554 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Mon, 27 Mar 2017 14:53:48 -0500 Subject: [PATCH 219/267] Make read_restart evenly divide the work of reading when using mpiio. Currently only affects restart files written without any per-atom fix data. --- src/read_restart.cpp | 90 +++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 21 deletions(-) diff --git a/src/read_restart.cpp b/src/read_restart.cpp index f29a603ef6..331a5d6cda 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -212,7 +212,8 @@ void ReadRestart::command(int narg, char **arg) int perAtomSize = avec->size_restart(); // ...so we can get its size atom->nlocal = 0; // restore nlocal to zero atoms int atomCt = (int) (assignedChunkSize / perAtomSize); -#ifdef DEBUG_ME_NOTNOW +//#define DEBUG_PRE_GROW +#ifdef DEBUG_PRE_GROW fprintf(stdout, "ReadRestart::command %04d: pAS %d, aCt %d, nmax %d, chunckSize %12.0f, %12.0f\n" ,me ,perAtomSize @@ -1026,6 +1027,7 @@ void ReadRestart::file_layout() // if the number of ranks that did the writing is different if (me == 0) { + int ndx; int *all_written_send_sizes; memory->create(all_written_send_sizes,nprocs_file, "write_restart:all_written_send_sizes"); @@ -1035,30 +1037,76 @@ void ReadRestart::file_layout() fread(all_written_send_sizes,sizeof(int),nprocs_file,fp); - int init_chunk_number = nprocs_file/nprocs; - int num_extra_chunks = nprocs_file - (nprocs*init_chunk_number); + if ((nprocs != nprocs_file) && !(atom->nextra_store)) { + // nprocs differ, but atom sizes are fixed length, yeah! + atom->nlocal = 1; // temporarily claim there is one atom... + int perAtomSize = atom->avec->size_restart(); // ...so we can get its size + atom->nlocal = 0; // restore nlocal to zero atoms - for (int i = 0; i < nprocs; i++) { - if (i < num_extra_chunks) - nproc_chunk_number[i] = init_chunk_number+1; - else - nproc_chunk_number[i] = init_chunk_number; - } + bigint total_size = 0; + for (int i = 0; i < nprocs_file; ++i) { + total_size += all_written_send_sizes[i]; + } + bigint total_ct = total_size / perAtomSize; - int all_written_send_sizes_index = 0; - bigint current_offset = 0; - for (int i=0;inatoms + ,(double) base_ct + ,(double) leftover_ct + ,nprocs + ,nprocs_file + ,(total_size == (total_ct * perAtomSize)) ? ' ' : 'E' + ,(total_ct == (base_ct * nprocs + leftover_ct)) ? ' ' : 'F' +); +#endif + } else { // Bummer, we have to read in based on how it was written + int init_chunk_number = nprocs_file/nprocs; + int num_extra_chunks = nprocs_file - (nprocs*init_chunk_number); + + for (int i = 0; i < nprocs; i++) { + if (i < num_extra_chunks) + nproc_chunk_number[i] = init_chunk_number+1; + else + nproc_chunk_number[i] = init_chunk_number; } + int all_written_send_sizes_index = 0; + bigint current_offset = 0; + for (int i=0;idestroy(all_written_send_sizes); memory->destroy(nproc_chunk_number); From 20ae05055dbb04054f0c1c6a0b4cda3260d7fbad Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 28 Mar 2017 11:38:26 -0600 Subject: [PATCH 220/267] fix memory leak via NeighListKokkos::clean_copy() There were several clean_copy() calls in pair styles *outside device code*. They seem to have been left over from an abandoned effort to copy the Kokkos neighbor list as a member of the pair style, instead of copying out the individual views needed. These leftover clean_copy() calls were setting pointers to NULL that had not been freed, leading to large memory leaks. I've removed the clean_copy() function entirely, and replaced it with the copymode flag system used in many other Kokkos objects. The copymode flag is only set to one in functors that hold copies of the neighbor list. --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 2 -- src/KOKKOS/fix_shardlow_kokkos.cpp | 2 -- src/KOKKOS/neigh_list_kokkos.cpp | 19 ++++++------------- src/KOKKOS/neigh_list_kokkos.h | 7 +------ src/KOKKOS/npair_kokkos.h | 2 +- src/KOKKOS/npair_ssa_kokkos.h | 2 +- src/KOKKOS/pair_coul_dsf_kokkos.cpp | 3 --- src/KOKKOS/pair_coul_wolf_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_alloy_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_fs_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_kokkos.cpp | 5 +---- src/KOKKOS/pair_kokkos.h | 4 ++-- src/KOKKOS/pair_reax_c_kokkos.cpp | 3 --- src/KOKKOS/pair_sw_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 1 - src/neigh_list.cpp | 2 ++ src/neigh_list.h | 3 ++- 19 files changed, 16 insertions(+), 51 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 3b8d5a85ea..fbc6e0a298 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -217,8 +217,6 @@ void FixQEqReaxKokkos::pre_force(int vflag) d_ilist = k_list->d_ilist; inum = list->inum; - k_list->clean_copy(); - //cleanup_copy(); copymode = 1; int teamsize = TEAMSIZE; diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index bf026552fa..676df07b61 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -624,8 +624,6 @@ void FixShardlowKokkos::initial_integrate(int vflag) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); - //cleanup_copy(); copymode = 1; dtsqrt = sqrt(update->dt); diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index b1b4e4467a..caf2dfee56 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -22,21 +22,14 @@ enum{NSQ,BIN,MULTI}; /* ---------------------------------------------------------------------- */ template -void NeighListKokkos::clean_copy() +NeighListKokkos::NeighListKokkos(class LAMMPS *lmp):NeighList(lmp) { - ilist = NULL; - numneigh = NULL; - firstneigh = NULL; - firstdouble = NULL; - dnum = 0; - iskip = NULL; - ijskip = NULL; - - ipage = NULL; - dpage = NULL; - + _stride = 1; + maxneighs = 16; + kokkos = 1; maxatoms = 0; -} + execution_space = ExecutionSpaceFromDevice::space; +}; /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index cece97197d..1c433f321c 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -68,18 +68,13 @@ class NeighListKokkos: public NeighList { public: int maxneighs; - void clean_copy(); void grow(int nmax); typename ArrayTypes::t_neighbors_2d d_neighbors; typename DAT::tdual_int_1d k_ilist; // local indices of I atoms typename ArrayTypes::t_int_1d d_ilist; typename ArrayTypes::t_int_1d d_numneigh; // # of J neighs for each I - NeighListKokkos(class LAMMPS *lmp): - NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0; - execution_space = ExecutionSpaceFromDevice::space; - }; - ~NeighListKokkos() {numneigh = NULL; ilist = NULL;}; + NeighListKokkos(class LAMMPS *lmp); KOKKOS_INLINE_FUNCTION AtomNeighbors get_neighbors(const int &i) const { diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index b31ef2ebbf..ab094e68eb 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -265,7 +265,7 @@ class NeighborKokkosExecute h_new_maxneighs() = neigh_list.maxneighs; }; - ~NeighborKokkosExecute() {neigh_list.clean_copy();}; + ~NeighborKokkosExecute() {neigh_list.copymode = 1;}; template KOKKOS_FUNCTION diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index e38d648984..96efd7404b 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -287,7 +287,7 @@ class NPairSSAKokkosExecute h_new_maxneighs() = neigh_list.maxneighs; }; - ~NPairSSAKokkosExecute() {neigh_list.clean_copy();}; + ~NPairSSAKokkosExecute() {neigh_list.copymode = 1;}; void build_locals(); void build_ghosts(); diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index f2063bdc08..e6f5407f2d 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -120,9 +120,6 @@ void PairCoulDSFKokkos::compute(int eflag_in, int vflag_in) int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // loop over neighbors of my atoms diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index 8049ba0031..75177e2d81 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -121,9 +121,6 @@ void PairCoulWolfKokkos::compute(int eflag_in, int vflag_in) int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // loop over neighbors of my atoms diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 45c320bc51..acf9b27963 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -122,9 +122,6 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b9fa82740a..a31263dfcd 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -122,9 +122,6 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index e4128de722..006c9582c5 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -117,9 +117,6 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density @@ -870,4 +867,4 @@ template class PairEAMKokkos; #ifdef KOKKOS_HAVE_CUDA template class PairEAMKokkos; #endif -} \ No newline at end of file +} diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 1e01b3df15..b0614a934b 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -87,7 +87,7 @@ struct PairComputeFunctor { vatom(c.d_vatom),list(*list_ptr) {}; // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; + ~PairComputeFunctor() {c.cleanup_copy();list.copymode = 1;}; KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { return j >> SBBITS & 3; @@ -344,7 +344,7 @@ struct PairComputeFunctor { PairComputeFunctor(PairStyle* c_ptr, NeighListKokkos* list_ptr): c(*c_ptr),list(*list_ptr) {}; - ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; + ~PairComputeFunctor() {c.cleanup_copy();list.copymode = 1;}; KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { return j >> SBBITS & 3; diff --git a/src/KOKKOS/pair_reax_c_kokkos.cpp b/src/KOKKOS/pair_reax_c_kokkos.cpp index acf9c754cd..87915dce3e 100644 --- a/src/KOKKOS/pair_reax_c_kokkos.cpp +++ b/src/KOKKOS/pair_reax_c_kokkos.cpp @@ -709,8 +709,6 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); - if (eflag_global) { for (int i = 0; i < 14; i++) pvector[i] = 0.0; @@ -4012,7 +4010,6 @@ void PairReaxCKokkos::FindBond(int &numbonds) const int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); d_ilist = k_list->d_ilist; - k_list->clean_copy(); numbonds = 0; PairReaxCKokkosFindBondFunctor find_bond_functor(this); diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index a8950a0c79..e5c947cc8e 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -115,7 +115,6 @@ void PairSWKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 75280c8f7c..833c815ad9 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -200,7 +200,6 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index d16a7fc4d7..d77ba2f141 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -200,7 +200,6 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index e9bae49fb7..040d8c5230 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -214,7 +214,6 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index 6376637832..dde544a69f 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -48,6 +48,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) ghost = 0; ssa = 0; copy = 0; + copymode = 0; dnum = 0; // ptrs @@ -86,6 +87,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) NeighList::~NeighList() { + if (copymode) return; if (!copy) { memory->destroy(ilist); memory->destroy(numneigh); diff --git a/src/neigh_list.h b/src/neigh_list.h index bef512512c..4010a68857 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -34,7 +34,8 @@ class NeighList : protected Pointers { int occasional; // 0 if build every reneighbor, 1 if not int ghost; // 1 if list stores neighbors of ghosts int ssa; // 1 if list stores Shardlow data - int copy; // 1 if this list copied from another list + int copy; // 1 if this list is (host) copied from another list + int copymode; // 1 if this is a Kokkos on-device copy int dnum; // # of doubles per neighbor, 0 if none // data structs to store neighbor pairs I,J and associated values From b3d6d9f8cf3e66437c7471a214fb668a7601c9fd Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Tue, 28 Mar 2017 11:38:26 -0600 Subject: [PATCH 221/267] fix memory leak via NeighListKokkos::clean_copy() There were several clean_copy() calls in pair styles *outside device code*. They seem to have been left over from an abandoned effort to copy the Kokkos neighbor list as a member of the pair style, instead of copying out the individual views needed. These leftover clean_copy() calls were setting pointers to NULL that had not been freed, leading to large memory leaks. I've removed the clean_copy() function entirely, and replaced it with the copymode flag system used in many other Kokkos objects. The copymode flag is only set to one in functors that hold copies of the neighbor list. --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 2 -- src/KOKKOS/fix_shardlow_kokkos.cpp | 2 -- src/KOKKOS/neigh_list_kokkos.cpp | 19 ++++++------------- src/KOKKOS/neigh_list_kokkos.h | 7 +------ src/KOKKOS/npair_kokkos.h | 2 +- src/KOKKOS/npair_ssa_kokkos.h | 2 +- src/KOKKOS/pair_coul_dsf_kokkos.cpp | 3 --- src/KOKKOS/pair_coul_wolf_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_alloy_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_fs_kokkos.cpp | 3 --- src/KOKKOS/pair_eam_kokkos.cpp | 5 +---- src/KOKKOS/pair_kokkos.h | 4 ++-- src/KOKKOS/pair_reax_c_kokkos.cpp | 3 --- src/KOKKOS/pair_sw_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 1 - src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 1 - src/neigh_list.cpp | 2 ++ src/neigh_list.h | 3 ++- 19 files changed, 16 insertions(+), 51 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 3b8d5a85ea..fbc6e0a298 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -217,8 +217,6 @@ void FixQEqReaxKokkos::pre_force(int vflag) d_ilist = k_list->d_ilist; inum = list->inum; - k_list->clean_copy(); - //cleanup_copy(); copymode = 1; int teamsize = TEAMSIZE; diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index bf026552fa..676df07b61 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -624,8 +624,6 @@ void FixShardlowKokkos::initial_integrate(int vflag) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); - //cleanup_copy(); copymode = 1; dtsqrt = sqrt(update->dt); diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index b1b4e4467a..caf2dfee56 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -22,21 +22,14 @@ enum{NSQ,BIN,MULTI}; /* ---------------------------------------------------------------------- */ template -void NeighListKokkos::clean_copy() +NeighListKokkos::NeighListKokkos(class LAMMPS *lmp):NeighList(lmp) { - ilist = NULL; - numneigh = NULL; - firstneigh = NULL; - firstdouble = NULL; - dnum = 0; - iskip = NULL; - ijskip = NULL; - - ipage = NULL; - dpage = NULL; - + _stride = 1; + maxneighs = 16; + kokkos = 1; maxatoms = 0; -} + execution_space = ExecutionSpaceFromDevice::space; +}; /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index cece97197d..1c433f321c 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -68,18 +68,13 @@ class NeighListKokkos: public NeighList { public: int maxneighs; - void clean_copy(); void grow(int nmax); typename ArrayTypes::t_neighbors_2d d_neighbors; typename DAT::tdual_int_1d k_ilist; // local indices of I atoms typename ArrayTypes::t_int_1d d_ilist; typename ArrayTypes::t_int_1d d_numneigh; // # of J neighs for each I - NeighListKokkos(class LAMMPS *lmp): - NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0; - execution_space = ExecutionSpaceFromDevice::space; - }; - ~NeighListKokkos() {numneigh = NULL; ilist = NULL;}; + NeighListKokkos(class LAMMPS *lmp); KOKKOS_INLINE_FUNCTION AtomNeighbors get_neighbors(const int &i) const { diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index b31ef2ebbf..ab094e68eb 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -265,7 +265,7 @@ class NeighborKokkosExecute h_new_maxneighs() = neigh_list.maxneighs; }; - ~NeighborKokkosExecute() {neigh_list.clean_copy();}; + ~NeighborKokkosExecute() {neigh_list.copymode = 1;}; template KOKKOS_FUNCTION diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index e38d648984..96efd7404b 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -287,7 +287,7 @@ class NPairSSAKokkosExecute h_new_maxneighs() = neigh_list.maxneighs; }; - ~NPairSSAKokkosExecute() {neigh_list.clean_copy();}; + ~NPairSSAKokkosExecute() {neigh_list.copymode = 1;}; void build_locals(); void build_ghosts(); diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index f2063bdc08..e6f5407f2d 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -120,9 +120,6 @@ void PairCoulDSFKokkos::compute(int eflag_in, int vflag_in) int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // loop over neighbors of my atoms diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index 8049ba0031..75177e2d81 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -121,9 +121,6 @@ void PairCoulWolfKokkos::compute(int eflag_in, int vflag_in) int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // loop over neighbors of my atoms diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 45c320bc51..acf9b27963 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -122,9 +122,6 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b9fa82740a..a31263dfcd 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -122,9 +122,6 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index e4128de722..006c9582c5 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -117,9 +117,6 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) d_ilist = k_list->d_ilist; int inum = list->inum; - // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - - k_list->clean_copy(); copymode = 1; // zero out density @@ -870,4 +867,4 @@ template class PairEAMKokkos; #ifdef KOKKOS_HAVE_CUDA template class PairEAMKokkos; #endif -} \ No newline at end of file +} diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 1e01b3df15..b0614a934b 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -87,7 +87,7 @@ struct PairComputeFunctor { vatom(c.d_vatom),list(*list_ptr) {}; // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle - ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; + ~PairComputeFunctor() {c.cleanup_copy();list.copymode = 1;}; KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { return j >> SBBITS & 3; @@ -344,7 +344,7 @@ struct PairComputeFunctor { PairComputeFunctor(PairStyle* c_ptr, NeighListKokkos* list_ptr): c(*c_ptr),list(*list_ptr) {}; - ~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();}; + ~PairComputeFunctor() {c.cleanup_copy();list.copymode = 1;}; KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const { return j >> SBBITS & 3; diff --git a/src/KOKKOS/pair_reax_c_kokkos.cpp b/src/KOKKOS/pair_reax_c_kokkos.cpp index acf9c754cd..87915dce3e 100644 --- a/src/KOKKOS/pair_reax_c_kokkos.cpp +++ b/src/KOKKOS/pair_reax_c_kokkos.cpp @@ -709,8 +709,6 @@ void PairReaxCKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); - if (eflag_global) { for (int i = 0; i < 14; i++) pvector[i] = 0.0; @@ -4012,7 +4010,6 @@ void PairReaxCKokkos::FindBond(int &numbonds) const int inum = list->inum; NeighListKokkos* k_list = static_cast*>(list); d_ilist = k_list->d_ilist; - k_list->clean_copy(); numbonds = 0; PairReaxCKokkosFindBondFunctor find_bond_functor(this); diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index a8950a0c79..e5c947cc8e 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -115,7 +115,6 @@ void PairSWKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 75280c8f7c..833c815ad9 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -200,7 +200,6 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index d16a7fc4d7..d77ba2f141 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -200,7 +200,6 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index e9bae49fb7..040d8c5230 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -214,7 +214,6 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - k_list->clean_copy(); copymode = 1; EV_FLOAT ev; diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index 6376637832..dde544a69f 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -48,6 +48,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) ghost = 0; ssa = 0; copy = 0; + copymode = 0; dnum = 0; // ptrs @@ -86,6 +87,7 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) NeighList::~NeighList() { + if (copymode) return; if (!copy) { memory->destroy(ilist); memory->destroy(numneigh); diff --git a/src/neigh_list.h b/src/neigh_list.h index bef512512c..4010a68857 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -34,7 +34,8 @@ class NeighList : protected Pointers { int occasional; // 0 if build every reneighbor, 1 if not int ghost; // 1 if list stores neighbors of ghosts int ssa; // 1 if list stores Shardlow data - int copy; // 1 if this list copied from another list + int copy; // 1 if this list is (host) copied from another list + int copymode; // 1 if this is a Kokkos on-device copy int dnum; // # of doubles per neighbor, 0 if none // data structs to store neighbor pairs I,J and associated values From bf4f0817d4f85bc22a6f1b3a01b40b8f23b46a6b Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 31 Mar 2017 15:57:00 -0600 Subject: [PATCH 222/267] fix memory leaks in pair_tabl_rx_kokkos --- src/KOKKOS/pair_table_rx_kokkos.cpp | 19 ++++++++++++++----- src/USER-DPD/pair_table_rx.cpp | 10 ++++++++++ src/USER-DPD/pair_table_rx.h | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 044f303bf5..eacaf83cf5 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -147,6 +147,9 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) h_table = new TableHost(); d_table = new TableDevice(); fractionalWeighting = true; + + site1 = nullptr; + site2 = nullptr; } /* ---------------------------------------------------------------------- */ @@ -156,14 +159,21 @@ PairTableRXKokkos::~PairTableRXKokkos() { if (copymode) return; + delete [] site1; + delete [] site2; + memory->destroy_kokkos(k_eatom,eatom); memory->destroy_kokkos(k_vatom,vatom); + if (allocated) { + memory->destroy_kokkos(d_table->cutsq, cutsq); + memory->destroy_kokkos(d_table->tabindex, tabindex); + } + delete h_table; h_table = nullptr; delete d_table; d_table = nullptr; - copymode = true; //prevents base class destructor from running } /* ---------------------------------------------------------------------- */ @@ -981,6 +991,8 @@ void PairTableRXKokkos::settings(int narg, char **arg) for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); + ntables = 0; + tables = NULL; if (allocated) { memory->destroy(setflag); @@ -990,11 +1002,8 @@ void PairTableRXKokkos::settings(int narg, char **arg) d_table_const.cutsq = d_table->cutsq = typename ArrayTypes::t_ffloat_2d(); h_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + allocated = 0; } - allocated = 0; - - ntables = 0; - tables = NULL; } /* ---------------------------------------------------------------------- diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index cf85fe2e60..89d09e7322 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -47,6 +47,16 @@ enum{NONE,RLINEAR,RSQ,BMP}; PairTableRX::PairTableRX(LAMMPS *lmp) : PairTable(lmp) { fractionalWeighting = true; + site1 = NULL; + site2 = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairTableRX::~PairTableRX() +{ + delete [] site1; + delete [] site2; } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index 9dee5df266..da7889e99a 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class PairTableRX : public PairTable { public: PairTableRX(class LAMMPS *); - virtual ~PairTableRX() {} + virtual ~PairTableRX(); virtual void compute(int, int); void settings(int, char **); From 5edbd63920681f585b054d4aebf2fb7eb462f5ce Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 31 Mar 2017 16:03:05 -0600 Subject: [PATCH 223/267] fix memory leak in fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 676df07b61..52287d586c 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -741,6 +741,7 @@ fprintf(stdout, "\n%6d %6d,%6d %6d: " ); #endif + copymode = 0; } /* ---------------------------------------------------------------------- */ From fe82926c1f41d2a99ad75ca3d07312ad0945e52a Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 31 Mar 2017 15:57:00 -0600 Subject: [PATCH 224/267] fix memory leaks in pair_tabl_rx_kokkos --- src/KOKKOS/pair_table_rx_kokkos.cpp | 19 ++++++++++++++----- src/USER-DPD/pair_table_rx.cpp | 10 ++++++++++ src/USER-DPD/pair_table_rx.h | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 044f303bf5..eacaf83cf5 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -147,6 +147,9 @@ PairTableRXKokkos::PairTableRXKokkos(LAMMPS *lmp) : PairTable(lmp) h_table = new TableHost(); d_table = new TableDevice(); fractionalWeighting = true; + + site1 = nullptr; + site2 = nullptr; } /* ---------------------------------------------------------------------- */ @@ -156,14 +159,21 @@ PairTableRXKokkos::~PairTableRXKokkos() { if (copymode) return; + delete [] site1; + delete [] site2; + memory->destroy_kokkos(k_eatom,eatom); memory->destroy_kokkos(k_vatom,vatom); + if (allocated) { + memory->destroy_kokkos(d_table->cutsq, cutsq); + memory->destroy_kokkos(d_table->tabindex, tabindex); + } + delete h_table; h_table = nullptr; delete d_table; d_table = nullptr; - copymode = true; //prevents base class destructor from running } /* ---------------------------------------------------------------------- */ @@ -981,6 +991,8 @@ void PairTableRXKokkos::settings(int narg, char **arg) for (int m = 0; m < ntables; m++) free_table(&tables[m]); memory->sfree(tables); + ntables = 0; + tables = NULL; if (allocated) { memory->destroy(setflag); @@ -990,11 +1002,8 @@ void PairTableRXKokkos::settings(int narg, char **arg) d_table_const.cutsq = d_table->cutsq = typename ArrayTypes::t_ffloat_2d(); h_table->cutsq = typename ArrayTypes::t_ffloat_2d(); + allocated = 0; } - allocated = 0; - - ntables = 0; - tables = NULL; } /* ---------------------------------------------------------------------- diff --git a/src/USER-DPD/pair_table_rx.cpp b/src/USER-DPD/pair_table_rx.cpp index cf85fe2e60..89d09e7322 100644 --- a/src/USER-DPD/pair_table_rx.cpp +++ b/src/USER-DPD/pair_table_rx.cpp @@ -47,6 +47,16 @@ enum{NONE,RLINEAR,RSQ,BMP}; PairTableRX::PairTableRX(LAMMPS *lmp) : PairTable(lmp) { fractionalWeighting = true; + site1 = NULL; + site2 = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairTableRX::~PairTableRX() +{ + delete [] site1; + delete [] site2; } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-DPD/pair_table_rx.h b/src/USER-DPD/pair_table_rx.h index 9dee5df266..da7889e99a 100644 --- a/src/USER-DPD/pair_table_rx.h +++ b/src/USER-DPD/pair_table_rx.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class PairTableRX : public PairTable { public: PairTableRX(class LAMMPS *); - virtual ~PairTableRX() {} + virtual ~PairTableRX(); virtual void compute(int, int); void settings(int, char **); From 6ba59cb4583c86af3f0104bb10e1ecd324bf9cce Mon Sep 17 00:00:00 2001 From: Dan Ibanez Date: Fri, 31 Mar 2017 16:03:05 -0600 Subject: [PATCH 225/267] fix memory leak in fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 676df07b61..52287d586c 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -741,6 +741,7 @@ fprintf(stdout, "\n%6d %6d,%6d %6d: " ); #endif + copymode = 0; } /* ---------------------------------------------------------------------- */ From ac64183ecfdd1a7cdd82770c96e1fbe05934967e Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 1 Apr 2017 12:11:55 -0400 Subject: [PATCH 226/267] USER-DPD Kokkos: WIP on preflighting SSA neighbor list build, with debugging --- src/KOKKOS/npair_ssa_kokkos.cpp | 73 +++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 59470189bc..87cc02e734 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -240,6 +240,58 @@ void NPairSSAKokkos::build(NeighList *list_) ssa_gitemLen = k_ssa_gitemLen.view(); } +{ // Preflight the neighbor list build + const typename ArrayTypes::t_int_1d_const c_bincount = k_bincount.view(); + int inum = 0; + + int workPhase = 0; + // loop over bins with local atoms, storing half of the neighbors + for (int zoff = sz1 - 1; zoff >= 0; --zoff) { + for (int yoff = sy1 - 1; yoff >= 0; --yoff) { + for (int xoff = sx1 - 1; xoff >= 0; --xoff) { + int workItem = 0; + for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { + for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { + for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { +// if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); + ssa_itemLoc(workPhase, workItem) = inum; // record where workItem starts in ilist + + for (int subphase = 0; subphase < 4; subphase++) { + int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); + int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0); + if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue; + if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; + + int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; + inum += c_bincount(ibin); + } + // record where workItem ends in ilist + ssa_itemLen(workPhase,workItem) = inum - ssa_itemLoc(workPhase,workItem); + if (ssa_itemLen(workPhase,workItem) > 0) workItem++; + } + } + } + +fprintf(stdout, "phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n" + ,workPhase + ,inum - ssa_itemLoc(workPhase, 0) + ,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt + ,workItem + ,(inum - ssa_itemLoc(workPhase, 0)) / (double) workItem +); + // record where workPhase ends + ssa_phaseLen(workPhase++) = workItem; + } + } + } +fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" + ,workPhase + ,inum + ,nlocal*4 + ,inum / (double) workPhase +); +} + NPairSSAKokkosExecute data(*list, k_cutneighsq.view(), @@ -355,18 +407,18 @@ void NPairSSAKokkosExecute::build_locals() int n = 0; int which = 0; int inum = 0; - int workPhase = 0; + // loop over bins with local atoms, storing half of the neighbors for (int zoff = sz1 - 1; zoff >= 0; --zoff) { for (int yoff = sy1 - 1; yoff >= 0; --yoff) { for (int xoff = sx1 - 1; xoff >= 0; --xoff) { int workItem = 0; + inum = d_ssa_itemLoc(workPhase, workItem); // get where workPhase starts in ilist for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { -// if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); - d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem starts in ilist + d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem actually starts in ilist for (int subphase = 0; subphase < 4; subphase++) { int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); @@ -441,18 +493,31 @@ void NPairSSAKokkosExecute::build_locals() } } } - // record where workItem ends in ilist + // record where workItem actually ends in ilist d_ssa_itemLen(workPhase,workItem) = inum - d_ssa_itemLoc(workPhase,workItem); if (d_ssa_itemLen(workPhase,workItem) > 0) workItem++; } } } +fprintf(stdout, "phase %3d used %6d inums, expected %6d inums. workItems = %3d, inums/workItems = %g\n" + ,workPhase + ,inum - d_ssa_itemLoc(workPhase, 0) + ,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt + ,workItem + ,(inum - d_ssa_itemLoc(workPhase, 0)) / (double) workItem +); // record where workPhase ends d_ssa_phaseLen(workPhase++) = workItem; } } } +fprintf(stdout, "Total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" + ,workPhase + ,inum + ,nlocal*4 + ,inum / (double) workPhase +); //FIXME if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); From ac4c35ce8d1caf0d7deaa1f0c816c0f5f5d9c523 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 1 Apr 2017 13:45:29 -0400 Subject: [PATCH 227/267] USER-DPD Kokkos: more WIP on preflighting SSA neighbor list build, with debugging --- src/KOKKOS/npair_ssa_kokkos.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 87cc02e734..5c20f1c270 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -205,8 +205,7 @@ void NPairSSAKokkos::build(NeighList *list_) { NeighListKokkos* list = (NeighListKokkos*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; - const int nl_size = (nlocal * 4) + atom->nghost; - list->grow(nl_size); // Make special larger SSA neighbor list + int nl_size = atom->nghost; ssa_phaseCt = sz1*sy1*sx1; @@ -240,8 +239,11 @@ void NPairSSAKokkos::build(NeighList *list_) ssa_gitemLen = k_ssa_gitemLen.view(); } -{ // Preflight the neighbor list build +{ // Preflight the neighbor list workplan const typename ArrayTypes::t_int_1d_const c_bincount = k_bincount.view(); + const typename ArrayTypes::t_int_2d_const c_bins = k_bins.view(); + const typename ArrayTypes::t_int_1d_const_um c_stencil = k_stencil.view(); + const typename ArrayTypes::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view(); int inum = 0; int workPhase = 0; @@ -263,7 +265,17 @@ void NPairSSAKokkos::build(NeighList *list_) if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; - inum += c_bincount(ibin); + for (int il = 0; il < c_bincount(ibin); ++il) { + int n = 0; + + // count all local atoms in the current stencil "subphase" as potential neighbors + for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { + const int jbin = ibin+c_stencil(k); + int jl = (jbin != ibin) ? 0 : (il + 1); // same bin as il, so start just past il in the bin + n += c_bincount(jbin) - jl; + } + if (n > 0) inum++; + } } // record where workItem ends in ilist ssa_itemLen(workPhase,workItem) = inum - ssa_itemLoc(workPhase,workItem); @@ -290,8 +302,11 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase ,nlocal*4 ,inum / (double) workPhase ); + nl_size += inum; } + list->grow(nl_size); // Make special larger SSA neighbor list + NPairSSAKokkosExecute data(*list, k_cutneighsq.view(), @@ -404,7 +419,6 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase template void NPairSSAKokkosExecute::build_locals() { - int n = 0; int which = 0; int inum = 0; int workPhase = 0; @@ -429,7 +443,7 @@ void NPairSSAKokkosExecute::build_locals() int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; for (int il = 0; il < c_bincount(ibin); ++il) { const int i = c_bins(ibin, il); - n = 0; + int n = 0; const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum); const X_FLOAT xtmp = x(i, 0); From e0021a3ff51702ed4b5c79720dfe69dd247988fa Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 1 Apr 2017 14:41:52 -0400 Subject: [PATCH 228/267] USER-DPD Kokkos: preflight SSA neigh list workplan to reduce allocated storage --- src/KOKKOS/npair_ssa_kokkos.cpp | 36 +++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 5c20f1c270..1c7095c9b4 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -205,7 +205,7 @@ void NPairSSAKokkos::build(NeighList *list_) { NeighListKokkos* list = (NeighListKokkos*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; - int nl_size = atom->nghost; + int nl_size; ssa_phaseCt = sz1*sy1*sx1; @@ -265,17 +265,17 @@ void NPairSSAKokkos::build(NeighList *list_) if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; - for (int il = 0; il < c_bincount(ibin); ++il) { - int n = 0; - - // count all local atoms in the current stencil "subphase" as potential neighbors - for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { - const int jbin = ibin+c_stencil(k); - int jl = (jbin != ibin) ? 0 : (il + 1); // same bin as il, so start just past il in the bin - n += c_bincount(jbin) - jl; - } - if (n > 0) inum++; + int base_n = 0; + bool include_same = false; + // count all local atoms in the current stencil "subphase" as potential neighbors + for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { + const int jbin = ibin+c_stencil(k); + if (jbin != ibin) base_n += c_bincount(jbin); + else include_same = true; } + // Calculate how many ibin particles would have had some neighbors + if (base_n > 0) inum += c_bincount(ibin); + else if (include_same) inum += c_bincount(ibin) - 1; } // record where workItem ends in ilist ssa_itemLen(workPhase,workItem) = inum - ssa_itemLoc(workPhase,workItem); @@ -302,9 +302,12 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase ,nlocal*4 ,inum / (double) workPhase ); - nl_size += inum; + nl_size = inum; // record how much space is needed for the local work plan } - + // count how many ghosts are likely to have neighbors, and increase the work plan storage + for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { + nl_size += k_gbincount.h_view(workPhase + 1); + } list->grow(nl_size); // Make special larger SSA neighbor list NPairSSAKokkosExecute @@ -412,6 +415,13 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something +fprintf(stdout, "%6d inum %6d gnum, total used %6d, allocated %6d\n" + ,list->inum + ,list->gnum + ,list->inum + list->gnum + ,nl_size +); + list->k_ilist.template modify(); } From c4c3d490c7e0e4c416a119f852ef2973229c2815 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sat, 1 Apr 2017 23:52:14 -0400 Subject: [PATCH 229/267] USER-DPD Kokkos: preflight storage needed for SSA threaded neigh list build --- src/KOKKOS/npair_ssa_kokkos.cpp | 122 +++++++++++++++++++++++--------- src/KOKKOS/npair_ssa_kokkos.h | 2 +- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 1c7095c9b4..042c48fbac 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -25,6 +25,7 @@ #include "nbin_ssa_kokkos.h" #include "nstencil_ssa.h" #include "error.h" +#include "comm.h" namespace LAMMPS_NS { @@ -255,8 +256,8 @@ void NPairSSAKokkos::build(NeighList *list_) for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { + int inum_start = inum; // if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small"); - ssa_itemLoc(workPhase, workItem) = inum; // record where workItem starts in ilist for (int subphase = 0; subphase < 4; subphase++) { int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); @@ -264,27 +265,40 @@ void NPairSSAKokkos::build(NeighList *list_) if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue; if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; - int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; - int base_n = 0; - bool include_same = false; - // count all local atoms in the current stencil "subphase" as potential neighbors - for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { - const int jbin = ibin+c_stencil(k); - if (jbin != ibin) base_n += c_bincount(jbin); - else include_same = true; + const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; + const int ibinCt = c_bincount(ibin); + if (ibinCt > 0) { + int base_n = 0; + bool include_same = false; + // count all local atoms in the current stencil "subphase" as potential neighbors + for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { + const int jbin = ibin+c_stencil(k); + if (jbin != ibin) base_n += c_bincount(jbin); + else include_same = true; + } + // Calculate how many ibin particles would have had some neighbors + if (base_n > 0) inum += ibinCt; + else if (include_same) inum += ibinCt - 1; } - // Calculate how many ibin particles would have had some neighbors - if (base_n > 0) inum += c_bincount(ibin); - else if (include_same) inum += c_bincount(ibin) - 1; } - // record where workItem ends in ilist - ssa_itemLen(workPhase,workItem) = inum - ssa_itemLoc(workPhase,workItem); - if (ssa_itemLen(workPhase,workItem) > 0) workItem++; + /* if (inum > inum_start) */ { + ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist + ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length +if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n" + ,comm->me + ,workPhase + ,workItem + ,inum + ,inum_start +); + workItem++; + } } } } -fprintf(stdout, "phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n" +fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n" + ,comm->me ,workPhase ,inum - ssa_itemLoc(workPhase, 0) ,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt @@ -296,7 +310,8 @@ fprintf(stdout, "phase %3d could use %6d inums, expected %6d inums. maxworkItems } } } -fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" +fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" + ,comm->me ,workPhase ,inum ,nlocal*4 @@ -378,6 +393,7 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase data.special_flag[2] = special_flag[2]; data.special_flag[3] = special_flag[3]; + bool firstTry = true; data.h_resize()=1; while(data.h_resize()) { data.h_new_maxneighs() = list->maxneighs; @@ -390,8 +406,9 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase NPairSSAKokkosBuildFunctor f(data,atoms_per_bin*5*sizeof(X_FLOAT)); Kokkos::parallel_for(nall, f); #endif - data.build_locals(); + data.build_locals(firstTry, comm->me); data.build_ghosts(); + firstTry = false; DeviceType::fence(); deep_copy(data.h_resize, data.resize); @@ -415,7 +432,8 @@ fprintf(stdout, "total %3d could use %6d inums, expected %6d inums. inums/phase list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something -fprintf(stdout, "%6d inum %6d gnum, total used %6d, allocated %6d\n" +fprintf(stdout, "Fina%03d: %6d inum %6d gnum, total used %6d, allocated %6d\n" + ,comm->me ,list->inum ,list->gnum ,list->inum + list->gnum @@ -427,8 +445,9 @@ fprintf(stdout, "%6d inum %6d gnum, total used %6d, allocated %6d\n" template -void NPairSSAKokkosExecute::build_locals() +void NPairSSAKokkosExecute::build_locals(const bool firstTry, int me) { + const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; int which = 0; int inum = 0; int workPhase = 0; @@ -438,11 +457,29 @@ void NPairSSAKokkosExecute::build_locals() for (int yoff = sy1 - 1; yoff >= 0; --yoff) { for (int xoff = sx1 - 1; xoff >= 0; --xoff) { int workItem = 0; - inum = d_ssa_itemLoc(workPhase, workItem); // get where workPhase starts in ilist + int skippedItems = 0; +// inum = d_ssa_itemLoc(workPhase, workItem); // get where workPhase starts in ilist for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { - d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem actually starts in ilist + if (d_ssa_itemLen(workPhase, workItem + skippedItems) == 0) { + if (firstTry) ++skippedItems; + else ++workItem; // phase is done,should break out of three loops here if we could... + continue; + } + int inum_start = d_ssa_itemLoc(workPhase, workItem + skippedItems); + if (inum > inum_start) { // This shouldn't happen! +fprintf(stdout, "Rank%03d workphase (%2d,%3d,%3d): inum = %4d, but ssa_itemLoc = %4d OVERFLOW\n" + ,me + ,workPhase + ,workItem + ,workItem + skippedItems + ,inum + ,d_ssa_itemLoc(workPhase, workItem + skippedItems) +); + inum_start = inum; + } else inum = inum_start; + // d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem actually starts in ilist for (int subphase = 0; subphase < 4; subphase++) { int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); @@ -461,9 +498,6 @@ void NPairSSAKokkosExecute::build_locals() const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const typename ArrayTypes::t_int_1d_const_um stencil - = d_stencil; - // loop over all local atoms in the current stencil "subphase" for (int k = d_nstencil_ssa(subphase); k < d_nstencil_ssa(subphase+1); k++) { const int jbin = ibin+stencil(k); @@ -517,26 +551,48 @@ void NPairSSAKokkosExecute::build_locals() } } } - // record where workItem actually ends in ilist - d_ssa_itemLen(workPhase,workItem) = inum - d_ssa_itemLoc(workPhase,workItem); - if (d_ssa_itemLen(workPhase,workItem) > 0) workItem++; + int len = inum - inum_start; + if (len != d_ssa_itemLen(workPhase, workItem + skippedItems)) { +fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = %4d%s\n" + ,me + ,workPhase + ,workItem + ,workItem + skippedItems + ,len + ,d_ssa_itemLen(workPhase, workItem + skippedItems) + ,(len > d_ssa_itemLen(workPhase, workItem + skippedItems)) ? " OVERFLOW" : "" +); + } + if (inum > inum_start) { + d_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist + d_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record actual workItem length + workItem++; + } else if (firstTry) ++skippedItems; } } } -fprintf(stdout, "phase %3d used %6d inums, expected %6d inums. workItems = %3d, inums/workItems = %g\n" +fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = %3d, inums/workItems = %g\n" + ,me ,workPhase ,inum - d_ssa_itemLoc(workPhase, 0) - ,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt ,workItem + ,skippedItems ,(inum - d_ssa_itemLoc(workPhase, 0)) / (double) workItem ); - // record where workPhase ends - d_ssa_phaseLen(workPhase++) = workItem; + // record where workPhase actually ends + if (firstTry) { + d_ssa_phaseLen(workPhase) = workItem; + while (workItem < (int) d_ssa_itemLen.dimension_1()) { + d_ssa_itemLen(workPhase,workItem++) = 0; + } + } + ++workPhase; } } } -fprintf(stdout, "Total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" +fprintf(stdout, "Totl%03d %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" + ,me ,workPhase ,inum ,nlocal*4 diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 96efd7404b..2c2ae15fb8 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -289,7 +289,7 @@ class NPairSSAKokkosExecute ~NPairSSAKokkosExecute() {neigh_list.copymode = 1;}; - void build_locals(); + void build_locals(const bool firstTry, int me); void build_ghosts(); KOKKOS_INLINE_FUNCTION From 2b2f3bd57c1d42a87334812cdab06f7e75405b7e Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Sun, 2 Apr 2017 00:07:24 -0400 Subject: [PATCH 230/267] USER-DPD Kokkos: #ifdef DEBUG_SSA_BUILD_LOCALS the new debug output --- src/KOKKOS/npair_ssa_kokkos.cpp | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 042c48fbac..4c3218a08a 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -281,9 +281,9 @@ void NPairSSAKokkos::build(NeighList *list_) else if (include_same) inum += ibinCt - 1; } } - /* if (inum > inum_start) */ { - ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist - ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length + ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist + ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length +#ifdef DEBUG_SSA_BUILD_LOCALS if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n" ,comm->me ,workPhase @@ -291,12 +291,13 @@ if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3 ,inum ,inum_start ); - workItem++; - } +#endif + workItem++; } } } +#ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n" ,comm->me ,workPhase @@ -305,11 +306,13 @@ fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. max ,workItem ,(inum - ssa_itemLoc(workPhase, 0)) / (double) workItem ); +#endif // record where workPhase ends ssa_phaseLen(workPhase++) = workItem; } } } +#ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" ,comm->me ,workPhase @@ -317,9 +320,10 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu ,nlocal*4 ,inum / (double) workPhase ); +#endif nl_size = inum; // record how much space is needed for the local work plan } - // count how many ghosts are likely to have neighbors, and increase the work plan storage + // count how many ghosts might have neighbors, and increase the work plan storage for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { nl_size += k_gbincount.h_view(workPhase + 1); } @@ -432,13 +436,15 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something -fprintf(stdout, "Fina%03d: %6d inum %6d gnum, total used %6d, allocated %6d\n" +#ifdef DEBUG_SSA_BUILD_LOCALS +fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n" ,comm->me ,list->inum ,list->gnum ,list->inum + list->gnum ,nl_size ); +#endif list->k_ilist.template modify(); } @@ -468,6 +474,7 @@ void NPairSSAKokkosExecute::build_locals(const bool firstTry, int me continue; } int inum_start = d_ssa_itemLoc(workPhase, workItem + skippedItems); +#ifdef DEBUG_SSA_BUILD_LOCALS if (inum > inum_start) { // This shouldn't happen! fprintf(stdout, "Rank%03d workphase (%2d,%3d,%3d): inum = %4d, but ssa_itemLoc = %4d OVERFLOW\n" ,me @@ -478,7 +485,9 @@ fprintf(stdout, "Rank%03d workphase (%2d,%3d,%3d): inum = %4d, but ssa_itemLoc = ,d_ssa_itemLoc(workPhase, workItem + skippedItems) ); inum_start = inum; - } else inum = inum_start; + } else +#endif + inum = inum_start; // d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem actually starts in ilist for (int subphase = 0; subphase < 4; subphase++) { @@ -552,6 +561,7 @@ fprintf(stdout, "Rank%03d workphase (%2d,%3d,%3d): inum = %4d, but ssa_itemLoc = } } int len = inum - inum_start; +#ifdef DEBUG_SSA_BUILD_LOCALS if (len != d_ssa_itemLen(workPhase, workItem + skippedItems)) { fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = %4d%s\n" ,me @@ -563,6 +573,7 @@ fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = ,(len > d_ssa_itemLen(workPhase, workItem + skippedItems)) ? " OVERFLOW" : "" ); } +#endif if (inum > inum_start) { d_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist d_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record actual workItem length @@ -572,6 +583,7 @@ fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = } } +#ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = %3d, inums/workItems = %g\n" ,me ,workPhase @@ -580,6 +592,7 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = % ,skippedItems ,(inum - d_ssa_itemLoc(workPhase, 0)) / (double) workItem ); +#endif // record where workPhase actually ends if (firstTry) { d_ssa_phaseLen(workPhase) = workItem; @@ -591,6 +604,7 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = % } } } +#ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "Totl%03d %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" ,me ,workPhase @@ -598,6 +612,7 @@ fprintf(stdout, "Totl%03d %3d could use %6d inums, expected %6d inums. inums/pha ,nlocal*4 ,inum / (double) workPhase ); +#endif //FIXME if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); From aedd7c57f3f78596f8b737972aed5f241ef4f7f4 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 3 Apr 2017 16:42:18 -0600 Subject: [PATCH 231/267] Reset atom map values from restart file --- src/read_restart.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 331a5d6cda..fcbd8d186d 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -905,8 +905,10 @@ void ReadRestart::header(int incompatible) atom->tag_enable = read_int(); } else if (flag == ATOM_MAP_STYLE) { atom->map_style = read_int(); + atom->map_style = 0; } else if (flag == ATOM_MAP_USER) { atom->map_user = read_int(); + atom->map_user = 0; } else if (flag == ATOM_SORTFREQ) { atom->sortfreq = read_int(); } else if (flag == ATOM_SORTBIN) { From 4d4b6f66b7139be46a9f292d7bad2403b22117f3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 5 Apr 2017 11:42:25 -0600 Subject: [PATCH 232/267] Changing default gb/test to on --- src/KOKKOS/kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index a000ad5550..10e7bda4e0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -34,7 +34,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) lmp->kokkos = this; auto_sync = 1; - gb_test = 0; + gb_test = 1; int me = 0; MPI_Comm_rank(world,&me); @@ -157,7 +157,7 @@ void KokkosLMP::accelerator(int narg, char **arg) neighflag = FULL; neighflag_qeq = FULL; neighflag_qeq_set = 0; - gb_test = 0; + gb_test = 1; int newtonflag = 0; double binsize = 0.0; exchange_comm_classic = forward_comm_classic = 0; From 9e272cb393fdce5697267a2b629cd5a3a3fdc0b2 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 6 Apr 2017 02:31:45 -0400 Subject: [PATCH 233/267] USER-DPD Kokkos: use a parallel_for() to build the locals workplan for SSA --- src/KOKKOS/npair_ssa_kokkos.cpp | 131 ++++++++++++++------------------ src/KOKKOS/npair_ssa_kokkos.h | 27 ++++++- 2 files changed, 84 insertions(+), 74 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 4c3218a08a..2b33256599 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -34,6 +34,14 @@ namespace LAMMPS_NS { template NPairSSAKokkos::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27), ssa_gphaseCt(7) { + const int gphaseLenEstimate = 1; //FIXME make this 4 eventually + k_ssa_gphaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_gphaseLen",ssa_gphaseCt); + ssa_gphaseLen = k_ssa_gphaseLen.view(); + + k_ssa_gitemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLoc",ssa_gphaseCt,gphaseLenEstimate); + ssa_gitemLoc = k_ssa_gitemLoc.view(); + k_ssa_gitemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLen",ssa_gphaseCt,gphaseLenEstimate); + ssa_gitemLen = k_ssa_gitemLen.view(); } /* ---------------------------------------------------------------------- @@ -132,6 +140,27 @@ void NPairSSAKokkos::copy_stencil_info() sx1 = ns_ssa->sx + 1; sy1 = ns_ssa->sy + 1; sz1 = ns_ssa->sz + 1; + + // Setup the phases of the workplan for locals + ssa_phaseCt = sz1*sy1*sx1; + if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) { + k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt); + ssa_phaseLen = k_ssa_phaseLen.view(); + k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt); + ssa_phaseOff = k_ssa_phaseOff.view(); + } + int workPhase = 0; + for (int zoff = sz1 - 1; zoff >= 0; --zoff) { + for (int yoff = sy1 - 1; yoff >= 0; --yoff) { + for (int xoff = sx1 - 1; xoff >= 0; --xoff) { + ssa_phaseOff(workPhase, 0) = xoff; + ssa_phaseOff(workPhase, 1) = yoff; + ssa_phaseOff(workPhase, 2) = zoff; + workPhase++; + } + } + } + } /* ---------------------------------------------------------------------- */ @@ -208,18 +237,11 @@ void NPairSSAKokkos::build(NeighList *list_) const int nlocal = includegroup?atom->nfirst:atom->nlocal; int nl_size; - ssa_phaseCt = sz1*sy1*sx1; + int xbinCt = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1; + int ybinCt = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; + int zbinCt = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; + int phaseLenEstimate = xbinCt*ybinCt*zbinCt; - int xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1; - int ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1; - int zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1; - int phaseLenEstimate = xbin*ybin*zbin; - int gphaseLenEstimate = 1; //FIXME make this 4 eventually - - if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) { - k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt); - ssa_phaseLen = k_ssa_phaseLen.view(); - } if ((ssa_phaseCt > (int) k_ssa_itemLoc.dimension_0()) || (phaseLenEstimate > (int) k_ssa_itemLoc.dimension_1())) { k_ssa_itemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLoc",ssa_phaseCt,phaseLenEstimate); @@ -228,18 +250,6 @@ void NPairSSAKokkos::build(NeighList *list_) ssa_itemLen = k_ssa_itemLen.view(); } - if (ssa_gphaseCt > (int) k_ssa_gphaseLen.dimension_0()) { - k_ssa_gphaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_gphaseLen",ssa_gphaseCt); - ssa_gphaseLen = k_ssa_gphaseLen.view(); - } - if ((ssa_gphaseCt > (int) k_ssa_gitemLoc.dimension_0()) || - (gphaseLenEstimate > (int) k_ssa_gitemLoc.dimension_1())) { - k_ssa_gitemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLoc",ssa_gphaseCt,gphaseLenEstimate); - ssa_gitemLoc = k_ssa_gitemLoc.view(); - k_ssa_gitemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLen",ssa_gphaseCt,gphaseLenEstimate); - ssa_gitemLen = k_ssa_gitemLen.view(); - } - { // Preflight the neighbor list workplan const typename ArrayTypes::t_int_1d_const c_bincount = k_bincount.view(); const typename ArrayTypes::t_int_2d_const c_bins = k_bins.view(); @@ -247,11 +257,11 @@ void NPairSSAKokkos::build(NeighList *list_) const typename ArrayTypes::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view(); int inum = 0; - int workPhase = 0; - // loop over bins with local atoms, storing half of the neighbors - for (int zoff = sz1 - 1; zoff >= 0; --zoff) { - for (int yoff = sy1 - 1; yoff >= 0; --yoff) { - for (int xoff = sx1 - 1; xoff >= 0; --xoff) { + // loop over bins with local atoms, counting half of the neighbors + for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { + int zoff = ssa_phaseOff(workPhase, 2); + int yoff = ssa_phaseOff(workPhase, 1); + int xoff = ssa_phaseOff(workPhase, 0); int workItem = 0; for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { @@ -308,9 +318,7 @@ fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. max ); #endif // record where workPhase ends - ssa_phaseLen(workPhase++) = workItem; - } - } + ssa_phaseLen(workPhase) = workItem; } #ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" @@ -343,6 +351,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_nstencil_ssa.view(), ssa_phaseCt, k_ssa_phaseLen.view(), + k_ssa_phaseOff.view(), k_ssa_itemLoc.view(), k_ssa_itemLen.view(), ssa_gphaseCt, @@ -410,7 +419,17 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu NPairSSAKokkosBuildFunctor f(data,atoms_per_bin*5*sizeof(X_FLOAT)); Kokkos::parallel_for(nall, f); #endif - data.build_locals(firstTry, comm->me); + // loop over bins with local atoms, storing half of the neighbors +#ifdef USE_LAMBDA_BUILD + Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) { + data.build_locals_onePhase(firstTry, comm->me, workPhase); + }); +#else + NPairSSAKokkosBuildFunctor f(data, firstTry, comm->me); + Kokkos::parallel_for(ssa_phaseCt, f); +#endif + data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) + + ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1); data.build_ghosts(); firstTry = false; @@ -451,20 +470,16 @@ fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n" template -void NPairSSAKokkosExecute::build_locals(const bool firstTry, int me) +void NPairSSAKokkosExecute::build_locals_onePhase(const bool firstTry, int me, int workPhase) const { const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; int which = 0; - int inum = 0; - int workPhase = 0; - // loop over bins with local atoms, storing half of the neighbors - for (int zoff = sz1 - 1; zoff >= 0; --zoff) { - for (int yoff = sy1 - 1; yoff >= 0; --yoff) { - for (int xoff = sx1 - 1; xoff >= 0; --xoff) { - int workItem = 0; - int skippedItems = 0; -// inum = d_ssa_itemLoc(workPhase, workItem); // get where workPhase starts in ilist + int zoff = d_ssa_phaseOff(workPhase, 2); + int yoff = d_ssa_phaseOff(workPhase, 1); + int xoff = d_ssa_phaseOff(workPhase, 0); + int workItem = 0; + int skippedItems = 0; for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) { @@ -474,21 +489,7 @@ void NPairSSAKokkosExecute::build_locals(const bool firstTry, int me continue; } int inum_start = d_ssa_itemLoc(workPhase, workItem + skippedItems); -#ifdef DEBUG_SSA_BUILD_LOCALS - if (inum > inum_start) { // This shouldn't happen! -fprintf(stdout, "Rank%03d workphase (%2d,%3d,%3d): inum = %4d, but ssa_itemLoc = %4d OVERFLOW\n" - ,me - ,workPhase - ,workItem - ,workItem + skippedItems - ,inum - ,d_ssa_itemLoc(workPhase, workItem + skippedItems) -); - inum_start = inum; - } else -#endif - inum = inum_start; - // d_ssa_itemLoc(workPhase, workItem) = inum; // record where workItem actually starts in ilist + int inum = inum_start; for (int subphase = 0; subphase < 4; subphase++) { int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0); @@ -600,23 +601,7 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = % d_ssa_itemLen(workPhase,workItem++) = 0; } } - ++workPhase; - } - } - } -#ifdef DEBUG_SSA_BUILD_LOCALS -fprintf(stdout, "Totl%03d %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" - ,me - ,workPhase - ,inum - ,nlocal*4 - ,inum / (double) workPhase -); -#endif -//FIXME if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong"); - - neigh_list.inum = inum; } diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 2c2ae15fb8..62c4135cc7 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -41,9 +41,11 @@ class NPairSSAKokkos : public NPair { // SSA Work plan data structures int ssa_phaseCt; DAT::tdual_int_1d k_ssa_phaseLen; + DAT::tdual_int_1d_3 k_ssa_phaseOff; DAT::tdual_int_2d k_ssa_itemLoc; DAT::tdual_int_2d k_ssa_itemLen; typename AT::t_int_1d ssa_phaseLen; + typename AT::t_int_1d_3 ssa_phaseOff; typename AT::t_int_2d ssa_itemLoc; typename AT::t_int_2d ssa_itemLen; @@ -175,6 +177,7 @@ class NPairSSAKokkosExecute // SSA Work plan data structures int ssa_phaseCt; typename AT::t_int_1d d_ssa_phaseLen; + typename AT::t_int_1d_3_const d_ssa_phaseOff; typename AT::t_int_2d d_ssa_itemLoc; typename AT::t_int_2d d_ssa_itemLen; int ssa_gphaseCt; @@ -198,6 +201,7 @@ class NPairSSAKokkosExecute const typename AT::t_int_1d &_d_nstencil_ssa, const int _ssa_phaseCt, const typename AT::t_int_1d &_d_ssa_phaseLen, + const typename AT::t_int_1d_3 &_d_ssa_phaseOff, const typename AT::t_int_2d &_d_ssa_itemLoc, const typename AT::t_int_2d &_d_ssa_itemLen, const int _ssa_gphaseCt, @@ -242,6 +246,7 @@ class NPairSSAKokkosExecute d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),d_nstencil_ssa(_d_nstencil_ssa), ssa_phaseCt(_ssa_phaseCt), d_ssa_phaseLen(_d_ssa_phaseLen), + d_ssa_phaseOff(_d_ssa_phaseOff), d_ssa_itemLoc(_d_ssa_itemLoc), d_ssa_itemLen(_d_ssa_itemLen), ssa_gphaseCt(_ssa_gphaseCt), @@ -289,7 +294,9 @@ class NPairSSAKokkosExecute ~NPairSSAKokkosExecute() {neigh_list.copymode = 1;}; - void build_locals(const bool firstTry, int me); + KOKKOS_FUNCTION + void build_locals_onePhase(const bool firstTry, int me, int workPhase) const; + void build_ghosts(); KOKKOS_INLINE_FUNCTION @@ -344,6 +351,24 @@ class NPairSSAKokkosExecute }; +template +struct NPairSSAKokkosBuildFunctor { + typedef DeviceType device_type; + + const NPairSSAKokkosExecute c; + const bool firstTry; + const int me; + + NPairSSAKokkosBuildFunctor(const NPairSSAKokkosExecute &_c, + const bool _firstTry, const int _me):c(_c), + firstTry(_firstTry), me(_me) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.build_locals_onePhase(firstTry, me, i); + } +}; + } #endif From 178af2ec9e7225daff3ce853af749f6fdb6e58a9 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Thu, 6 Apr 2017 03:53:57 -0400 Subject: [PATCH 234/267] USER-DPD Kokkos: use a parallel_for() to build the ghosts workplan for SSA --- src/KOKKOS/npair_ssa_kokkos.cpp | 47 +++++++++++++-------------------- src/KOKKOS/npair_ssa_kokkos.h | 25 +++--------------- 2 files changed, 23 insertions(+), 49 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 2b33256599..ba4bc9171c 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -333,7 +333,10 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu } // count how many ghosts might have neighbors, and increase the work plan storage for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { - nl_size += k_gbincount.h_view(workPhase + 1); + int len = k_gbincount.h_view(workPhase + 1); + ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist + ssa_gitemLen(workPhase,0) = len; + nl_size += len; } list->grow(nl_size); // Make special larger SSA neighbor list @@ -415,22 +418,19 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu Kokkos::deep_copy(data.resize, data.h_resize); Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#ifdef NOTYET - NPairSSAKokkosBuildFunctor f(data,atoms_per_bin*5*sizeof(X_FLOAT)); - Kokkos::parallel_for(nall, f); -#endif // loop over bins with local atoms, storing half of the neighbors -#ifdef USE_LAMBDA_BUILD Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) { data.build_locals_onePhase(firstTry, comm->me, workPhase); }); -#else - NPairSSAKokkosBuildFunctor f(data, firstTry, comm->me); - Kokkos::parallel_for(ssa_phaseCt, f); -#endif data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) + ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1); - data.build_ghosts(); + + // loop over AIR ghost atoms, storing their local neighbors + Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) { + data.build_ghosts_onePhase(workPhase); + }); + data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) + + ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; firstTry = false; DeviceType::fence(); @@ -606,34 +606,27 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = % template -void NPairSSAKokkosExecute::build_ghosts() +void NPairSSAKokkosExecute::build_ghosts_onePhase(int workPhase) const { - int n = 0; + const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; int which = 0; - int inum = neigh_list.inum; - int gnum = 0; - // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds - for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { int airnum = workPhase + 1; //FIXME for now, there is only 1 workItem for each ghost AIR int workItem; for (workItem = 0; workItem < 1; ++workItem) { - d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist + int gNdx = d_ssa_gitemLoc(workPhase, workItem); // record where workItem starts in ilist for (int il = 0; il < c_gbincount(airnum); ++il) { const int i = c_gbins(airnum, il); - n = 0; + int n = 0; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum); + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(gNdx); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const typename ArrayTypes::t_int_1d_const_um stencil - = d_stencil; - int loc[3]; const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); @@ -686,8 +679,8 @@ void NPairSSAKokkosExecute::build_ghosts() } if (n > 0) { - neigh_list.d_numneigh(inum + gnum) = n; - neigh_list.d_ilist(inum + (gnum++)) = i; + neigh_list.d_numneigh(gNdx) = n; + neigh_list.d_ilist(gNdx++) = i; if(n > neigh_list.maxneighs) { resize() = 1; if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); @@ -695,12 +688,10 @@ void NPairSSAKokkosExecute::build_ghosts() } } // record where workItem ends in ilist - d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); + d_ssa_gitemLen(workPhase,workItem) = gNdx - d_ssa_gitemLoc(workPhase,workItem); // if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; } d_ssa_gphaseLen(workPhase) = workItem; - } - neigh_list.gnum = gnum; } } diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 62c4135cc7..98046feba8 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -275,7 +275,7 @@ class NPairSSAKokkosExecute bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); + resize = typename AT::t_int_scalar("NPairSSAKokkosExecute::resize"); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(resize); #else @@ -283,7 +283,7 @@ class NPairSSAKokkosExecute #endif h_resize() = 1; new_maxneighs = typename AT:: - t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); + t_int_scalar("NPairSSAKokkosExecute::new_maxneighs"); #ifndef KOKKOS_USE_CUDA_UVM h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); #else @@ -297,7 +297,8 @@ class NPairSSAKokkosExecute KOKKOS_FUNCTION void build_locals_onePhase(const bool firstTry, int me, int workPhase) const; - void build_ghosts(); + KOKKOS_FUNCTION + void build_ghosts_onePhase(int workPhase) const; KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const @@ -351,24 +352,6 @@ class NPairSSAKokkosExecute }; -template -struct NPairSSAKokkosBuildFunctor { - typedef DeviceType device_type; - - const NPairSSAKokkosExecute c; - const bool firstTry; - const int me; - - NPairSSAKokkosBuildFunctor(const NPairSSAKokkosExecute &_c, - const bool _firstTry, const int _me):c(_c), - firstTry(_firstTry), me(_me) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.build_locals_onePhase(firstTry, me, i); - } -}; - } #endif From 035d0a80d7bc8375886c3c6989a85c1bda12de67 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 10 Apr 2017 16:38:58 -0600 Subject: [PATCH 235/267] Reducing memory churn in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 56 +++++++++++++++++++++--------- src/KOKKOS/pair_exp6_rx_kokkos.h | 4 +++ 2 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 5c74cba8c7..312f1c6076 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -187,22 +187,25 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) { const int np_total = nlocal + atom->nghost; - PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); - PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); - PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); - PairExp6ParamData.mixWtSite1 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1" ,np_total); - PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); - PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); - PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); - PairExp6ParamData.mixWtSite2 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2" ,np_total); - PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); - PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); - PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); - PairExp6ParamData.mixWtSite1old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1old",np_total); - PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); - PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); - PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); - PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); + if (np_total > PairExp6ParamData.epsilon1.dimension_0()) { + PairExp6ParamData.epsilon1 = typename AT::t_float_1d("PairExp6ParamData.epsilon1" ,np_total); + PairExp6ParamData.alpha1 = typename AT::t_float_1d("PairExp6ParamData.alpha1" ,np_total); + PairExp6ParamData.rm1 = typename AT::t_float_1d("PairExp6ParamData.rm1" ,np_total); + PairExp6ParamData.mixWtSite1 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1" ,np_total); + PairExp6ParamData.epsilon2 = typename AT::t_float_1d("PairExp6ParamData.epsilon2" ,np_total); + PairExp6ParamData.alpha2 = typename AT::t_float_1d("PairExp6ParamData.alpha2" ,np_total); + PairExp6ParamData.rm2 = typename AT::t_float_1d("PairExp6ParamData.rm2" ,np_total); + PairExp6ParamData.mixWtSite2 = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2" ,np_total); + PairExp6ParamData.epsilonOld1 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld1" ,np_total); + PairExp6ParamData.alphaOld1 = typename AT::t_float_1d("PairExp6ParamData.alphaOld1" ,np_total); + PairExp6ParamData.rmOld1 = typename AT::t_float_1d("PairExp6ParamData.rmOld1" ,np_total); + PairExp6ParamData.mixWtSite1old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite1old",np_total); + PairExp6ParamData.epsilonOld2 = typename AT::t_float_1d("PairExp6ParamData.epsilonOld2" ,np_total); + PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); + PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); + PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); + } else + Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); #ifdef KOKKOS_HAVE_CUDA Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); @@ -352,6 +355,27 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) //printf("PairExp6rxKokkos::compute %f %f\n", getElapsedTime(t_start, t_stop), getElapsedTime(t_mix_start, t_mix_stop)); } +template +KOKKOS_INLINE_FUNCTION +void PairExp6rxKokkos::operator()(TagPairExp6rxZeroMixingWeights, const int &i) const { + PairExp6ParamData.epsilon1[i] = 0.0; + PairExp6ParamData.alpha1[i] = 0.0; + PairExp6ParamData.rm1[i] = 0.0; + PairExp6ParamData.mixWtSite1[i] = 0.0; + PairExp6ParamData.epsilon2[i] = 0.0; + PairExp6ParamData.alpha2[i] = 0.0; + PairExp6ParamData.rm2[i] = 0.0; + PairExp6ParamData.mixWtSite2[i] = 0.0; + PairExp6ParamData.epsilonOld1[i] = 0.0; + PairExp6ParamData.alphaOld1[i] = 0.0; + PairExp6ParamData.rmOld1[i] = 0.0; + PairExp6ParamData.mixWtSite1old[i] = 0.0; + PairExp6ParamData.epsilonOld2[i] = 0.0; + PairExp6ParamData.alphaOld2[i] = 0.0; + PairExp6ParamData.rmOld2[i] = 0.0; + PairExp6ParamData.mixWtSite2old[i] = 0.0; +} + template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxgetMixingWeights, const int &i) const { diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 9f38732c32..5e9fb4e3e3 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -52,6 +52,7 @@ struct PairExp6ParamDataTypeKokkos {} }; +struct TagPairExp6rxZeroMixingWeights{}; struct TagPairExp6rxgetMixingWeights{}; template @@ -76,6 +77,9 @@ class PairExp6rxKokkos : public PairExp6rx { void coeff(int, char **); void init_style(); + KOKKOS_INLINE_FUNCTION + void operator()(TagPairExp6rxZeroMixingWeights, const int&) const; + KOKKOS_INLINE_FUNCTION void operator()(TagPairExp6rxgetMixingWeights, const int&) const; From 6c0b6918821ac738327b0aee398873546a929340 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Apr 2017 09:12:46 -0600 Subject: [PATCH 236/267] Removing more memory churn in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 67 ++++++++++++++++++++---------- src/KOKKOS/pair_exp6_rx_kokkos.h | 24 +++++++++++ 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 312f1c6076..51cf1a72e7 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -204,6 +204,29 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) PairExp6ParamData.alphaOld2 = typename AT::t_float_1d("PairExp6ParamData.alphaOld2" ,np_total); PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); + + PairExp6ParamDataVect.epsilon = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon" ,np_total);; + PairExp6ParamDataVect.rm3 = typename AT::t_float_1d("PairExp6ParamDataVect.rm3" ,np_total);; + PairExp6ParamDataVect.alpha = typename AT::t_float_1d("PairExp6ParamDataVect.alpha" ,np_total);; + PairExp6ParamDataVect.xMolei = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei" ,np_total);; + PairExp6ParamDataVect.epsilon_old = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon_old" ,np_total);; + PairExp6ParamDataVect.rm3_old = typename AT::t_float_1d("PairExp6ParamDataVect.rm3_old" ,np_total);; + PairExp6ParamDataVect.alpha_old = typename AT::t_float_1d("PairExp6ParamDataVect.alpha_old" ,np_total);; + PairExp6ParamDataVect.xMolei_old = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei_old" ,np_total);; + PairExp6ParamDataVect.fractionOFA = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFA" ,np_total);; + PairExp6ParamDataVect.fraction1 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction1" ,np_total);; + PairExp6ParamDataVect.fraction2 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction2" ,np_total);; + PairExp6ParamDataVect.nMoleculesOFA = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFA" ,np_total);; + PairExp6ParamDataVect.nMolecules1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules1" ,np_total);; + PairExp6ParamDataVect.nMolecules2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules2" ,np_total);; + PairExp6ParamDataVect.nTotal = typename AT::t_float_1d("PairExp6ParamDataVect.nTotal" ,np_total);; + PairExp6ParamDataVect.fractionOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFAold" ,np_total);; + PairExp6ParamDataVect.fractionOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld1" ,np_total);; + PairExp6ParamDataVect.fractionOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld2" ,np_total);; + PairExp6ParamDataVect.nMoleculesOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFAold",np_total);; + PairExp6ParamDataVect.nMoleculesOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld1" ,np_total);; + PairExp6ParamDataVect.nMoleculesOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld2" ,np_total);; + PairExp6ParamDataVect.nTotalold = typename AT::t_float_1d("PairExp6ParamDataVect.nTotalold" ,np_total);; } else Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); @@ -2094,31 +2117,31 @@ template void PairExp6rxKokkos::getMixingWeightsVect(const int np_total, int errorFlag, ArrayT &epsilon1, ArrayT &alpha1, ArrayT &rm1, ArrayT &mixWtSite1, ArrayT &epsilon2, ArrayT &alpha2, ArrayT &rm2, ArrayT &mixWtSite2, ArrayT &epsilon1_old, ArrayT &alpha1_old, ArrayT &rm1_old, ArrayT &mixWtSite1old, ArrayT &epsilon2_old, ArrayT &alpha2_old, ArrayT &rm2_old, ArrayT &mixWtSite2old) const { - ArrayT epsilon("PairExp6ParamData.epsilon", np_total); - ArrayT rm3("PairExp6ParamData.rm3", np_total); - ArrayT alpha("PairExp6ParamData.alpha", np_total); - ArrayT xMolei("PairExp6ParamData.xMolei", np_total); + ArrayT epsilon = PairExp6ParamDataVect.epsilon ; + ArrayT rm3 = PairExp6ParamDataVect.rm3 ; + ArrayT alpha = PairExp6ParamDataVect.alpha ; + ArrayT xMolei = PairExp6ParamDataVect.xMolei ; - ArrayT epsilon_old("PairExp6ParamData.epsilon_old", np_total); - ArrayT rm3_old("PairExp6ParamData.rm3_old", np_total); - ArrayT alpha_old("PairExp6ParamData.alpha_old", np_total); - ArrayT xMolei_old("PairExp6ParamData.xMolei_old", np_total); + ArrayT epsilon_old = PairExp6ParamDataVect.epsilon_old ; + ArrayT rm3_old = PairExp6ParamDataVect.rm3_old ; + ArrayT alpha_old = PairExp6ParamDataVect.alpha_old ; + ArrayT xMolei_old = PairExp6ParamDataVect.xMolei_old ; - ArrayT fractionOFA("PairExp6ParamData.fractionOFA", np_total); - ArrayT fraction1("PairExp6ParamData.fraction1", np_total); - ArrayT fraction2("PairExp6ParamData.fraction2", np_total); - ArrayT nMoleculesOFA("PairExp6ParamData.nMoleculesOFA", np_total); - ArrayT nMolecules1("PairExp6ParamData.nMolecules1", np_total); - ArrayT nMolecules2("PairExp6ParamData.nMolecules2", np_total); - ArrayT nTotal("PairExp6ParamData.nTotal", np_total); + ArrayT fractionOFA = PairExp6ParamDataVect.fractionOFA ; + ArrayT fraction1 = PairExp6ParamDataVect.fraction1 ; + ArrayT fraction2 = PairExp6ParamDataVect.fraction2 ; + ArrayT nMoleculesOFA = PairExp6ParamDataVect.nMoleculesOFA ; + ArrayT nMolecules1 = PairExp6ParamDataVect.nMolecules1 ; + ArrayT nMolecules2 = PairExp6ParamDataVect.nMolecules2 ; + ArrayT nTotal = PairExp6ParamDataVect.nTotal ; - ArrayT fractionOFAold("PairExp6ParamData.fractionOFAold", np_total); - ArrayT fractionOld1("PairExp6ParamData.fractionOld1", np_total); - ArrayT fractionOld2("PairExp6ParamData.fractionOld2", np_total); - ArrayT nMoleculesOFAold("PairExp6ParamData.nMoleculesOFAold", np_total); - ArrayT nMoleculesOld1("PairExp6ParamData.nMoleculesOld1", np_total); - ArrayT nMoleculesOld2("PairExp6ParamData.nMoleculesOld2", np_total); - ArrayT nTotalold("PairExp6ParamData.nTotalold", np_total); + ArrayT fractionOFAold = PairExp6ParamDataVect.fractionOFAold ; + ArrayT fractionOld1 = PairExp6ParamDataVect.fractionOld1 ; + ArrayT fractionOld2 = PairExp6ParamDataVect.fractionOld2 ; + ArrayT nMoleculesOFAold = PairExp6ParamDataVect.nMoleculesOFAold; + ArrayT nMoleculesOld1 = PairExp6ParamDataVect.nMoleculesOld1 ; + ArrayT nMoleculesOld2 = PairExp6ParamDataVect.nMoleculesOld2 ; + ArrayT nTotalold = PairExp6ParamDataVect.nTotalold ; int errorFlag1 = 0, errorFlag2 = 0; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 5e9fb4e3e3..09283662a2 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -52,6 +52,29 @@ struct PairExp6ParamDataTypeKokkos {} }; +template +struct PairExp6ParamDataTypeKokkosVect +{ + typedef ArrayTypes AT; + + typename AT::t_float_1d epsilon, rm3, alpha, xMolei, epsilon_old, rm3_old, + alpha_old, xMolei_old, fractionOFA, fraction1, + fraction2, nMoleculesOFA, nMolecules1, nMolecules2, + nTotal, fractionOFAold, fractionOld1, fractionOld2, + nMoleculesOFAold, nMoleculesOld1, nMoleculesOld2, + nTotalold; + + // Default constructor -- nullify everything. + PairExp6ParamDataTypeKokkosVect(void) + : epsilon(NULL), rm3(NULL), alpha(NULL), xMolei(NULL), epsilon_old(NULL), rm3_old(NULL), + alpha_old(NULL), xMolei_old(NULL), fractionOFA(NULL), fraction1(NULL), + fraction2(NULL), nMoleculesOFA(NULL), nMolecules1(NULL), nMolecules2(NULL), + nTotal(NULL), fractionOFAold(NULL), fractionOld1(NULL), fractionOld2(NULL), + nMoleculesOFAold(NULL), nMoleculesOld1(NULL), nMoleculesOld2(NULL), + nTotalold(NULL) + {} +}; + struct TagPairExp6rxZeroMixingWeights{}; struct TagPairExp6rxgetMixingWeights{}; @@ -148,6 +171,7 @@ class PairExp6rxKokkos : public PairExp6rx { typename AT::t_int_1d_randomread d_numneigh; PairExp6ParamDataTypeKokkos PairExp6ParamData; + PairExp6ParamDataTypeKokkosVect PairExp6ParamDataVect; void allocate(); DAT::tdual_int_1d k_mol2param; // mapping from molecule to parameters From ca4619e22791294cb7e63a0043869504350772ed Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 11 Apr 2017 09:14:21 -0600 Subject: [PATCH 237/267] Fix format issue in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 51cf1a72e7..5b84f09fd6 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -205,28 +205,28 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) PairExp6ParamData.rmOld2 = typename AT::t_float_1d("PairExp6ParamData.rmOld2" ,np_total); PairExp6ParamData.mixWtSite2old = typename AT::t_float_1d("PairExp6ParamData.mixWtSite2old",np_total); - PairExp6ParamDataVect.epsilon = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon" ,np_total);; - PairExp6ParamDataVect.rm3 = typename AT::t_float_1d("PairExp6ParamDataVect.rm3" ,np_total);; - PairExp6ParamDataVect.alpha = typename AT::t_float_1d("PairExp6ParamDataVect.alpha" ,np_total);; - PairExp6ParamDataVect.xMolei = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei" ,np_total);; - PairExp6ParamDataVect.epsilon_old = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon_old" ,np_total);; - PairExp6ParamDataVect.rm3_old = typename AT::t_float_1d("PairExp6ParamDataVect.rm3_old" ,np_total);; - PairExp6ParamDataVect.alpha_old = typename AT::t_float_1d("PairExp6ParamDataVect.alpha_old" ,np_total);; - PairExp6ParamDataVect.xMolei_old = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei_old" ,np_total);; - PairExp6ParamDataVect.fractionOFA = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFA" ,np_total);; - PairExp6ParamDataVect.fraction1 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction1" ,np_total);; - PairExp6ParamDataVect.fraction2 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction2" ,np_total);; - PairExp6ParamDataVect.nMoleculesOFA = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFA" ,np_total);; - PairExp6ParamDataVect.nMolecules1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules1" ,np_total);; - PairExp6ParamDataVect.nMolecules2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules2" ,np_total);; - PairExp6ParamDataVect.nTotal = typename AT::t_float_1d("PairExp6ParamDataVect.nTotal" ,np_total);; - PairExp6ParamDataVect.fractionOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFAold" ,np_total);; - PairExp6ParamDataVect.fractionOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld1" ,np_total);; - PairExp6ParamDataVect.fractionOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld2" ,np_total);; - PairExp6ParamDataVect.nMoleculesOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFAold",np_total);; - PairExp6ParamDataVect.nMoleculesOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld1" ,np_total);; - PairExp6ParamDataVect.nMoleculesOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld2" ,np_total);; - PairExp6ParamDataVect.nTotalold = typename AT::t_float_1d("PairExp6ParamDataVect.nTotalold" ,np_total);; + PairExp6ParamDataVect.epsilon = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon" ,np_total); + PairExp6ParamDataVect.rm3 = typename AT::t_float_1d("PairExp6ParamDataVect.rm3" ,np_total); + PairExp6ParamDataVect.alpha = typename AT::t_float_1d("PairExp6ParamDataVect.alpha" ,np_total); + PairExp6ParamDataVect.xMolei = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei" ,np_total); + PairExp6ParamDataVect.epsilon_old = typename AT::t_float_1d("PairExp6ParamDataVect.epsilon_old" ,np_total); + PairExp6ParamDataVect.rm3_old = typename AT::t_float_1d("PairExp6ParamDataVect.rm3_old" ,np_total); + PairExp6ParamDataVect.alpha_old = typename AT::t_float_1d("PairExp6ParamDataVect.alpha_old" ,np_total); + PairExp6ParamDataVect.xMolei_old = typename AT::t_float_1d("PairExp6ParamDataVect.xMolei_old" ,np_total); + PairExp6ParamDataVect.fractionOFA = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFA" ,np_total); + PairExp6ParamDataVect.fraction1 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction1" ,np_total); + PairExp6ParamDataVect.fraction2 = typename AT::t_float_1d("PairExp6ParamDataVect.fraction2" ,np_total); + PairExp6ParamDataVect.nMoleculesOFA = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFA" ,np_total); + PairExp6ParamDataVect.nMolecules1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules1" ,np_total); + PairExp6ParamDataVect.nMolecules2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMolecules2" ,np_total); + PairExp6ParamDataVect.nTotal = typename AT::t_float_1d("PairExp6ParamDataVect.nTotal" ,np_total); + PairExp6ParamDataVect.fractionOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOFAold" ,np_total); + PairExp6ParamDataVect.fractionOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld1" ,np_total); + PairExp6ParamDataVect.fractionOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.fractionOld2" ,np_total); + PairExp6ParamDataVect.nMoleculesOFAold = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOFAold",np_total); + PairExp6ParamDataVect.nMoleculesOld1 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld1" ,np_total); + PairExp6ParamDataVect.nMoleculesOld2 = typename AT::t_float_1d("PairExp6ParamDataVect.nMoleculesOld2" ,np_total); + PairExp6ParamDataVect.nTotalold = typename AT::t_float_1d("PairExp6ParamDataVect.nTotalold" ,np_total); } else Kokkos::parallel_for(Kokkos::RangePolicy(0,np_total),*this); From 36cbe439780dc8b44ecbb25036327853033aab68 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 6 Jun 2017 10:51:26 -0600 Subject: [PATCH 238/267] Fixing some CUDA runtime issues in npair_ssa_kokkos --- src/KOKKOS/npair_ssa_kokkos.cpp | 100 +++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 26 deletions(-) diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index ba4bc9171c..0c3a5985ff 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -149,17 +149,21 @@ void NPairSSAKokkos::copy_stencil_info() k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt); ssa_phaseOff = k_ssa_phaseOff.view(); } + auto h_ssa_phaseOff = k_ssa_phaseOff.h_view; + k_ssa_phaseOff.sync(); int workPhase = 0; for (int zoff = sz1 - 1; zoff >= 0; --zoff) { for (int yoff = sy1 - 1; yoff >= 0; --yoff) { for (int xoff = sx1 - 1; xoff >= 0; --xoff) { - ssa_phaseOff(workPhase, 0) = xoff; - ssa_phaseOff(workPhase, 1) = yoff; - ssa_phaseOff(workPhase, 2) = zoff; + h_ssa_phaseOff(workPhase, 0) = xoff; + h_ssa_phaseOff(workPhase, 1) = yoff; + h_ssa_phaseOff(workPhase, 2) = zoff; workPhase++; } } } + k_ssa_phaseOff.modify(); + k_ssa_phaseOff.sync(); } @@ -250,8 +254,25 @@ void NPairSSAKokkos::build(NeighList *list_) ssa_itemLen = k_ssa_itemLen.view(); } + k_ssa_itemLoc.sync(); + k_ssa_itemLen.sync(); + k_ssa_gitemLoc.sync(); + k_ssa_gitemLen.sync(); + k_ssa_phaseOff.sync(); + k_ssa_phaseLen.sync(); + k_ssa_gphaseLen.sync(); + auto h_ssa_itemLoc = k_ssa_itemLoc.h_view; + auto h_ssa_itemLen = k_ssa_itemLen.h_view; + auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view; + auto h_ssa_gitemLen = k_ssa_gitemLen.h_view; + auto h_ssa_phaseOff = k_ssa_phaseOff.h_view; + auto h_ssa_phaseLen = k_ssa_phaseLen.h_view; + auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view; + { // Preflight the neighbor list workplan const typename ArrayTypes::t_int_1d_const c_bincount = k_bincount.view(); + k_bincount.sync(); + auto h_bincount = k_bincount.h_view; const typename ArrayTypes::t_int_2d_const c_bins = k_bins.view(); const typename ArrayTypes::t_int_1d_const_um c_stencil = k_stencil.view(); const typename ArrayTypes::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view(); @@ -259,9 +280,9 @@ void NPairSSAKokkos::build(NeighList *list_) // loop over bins with local atoms, counting half of the neighbors for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { - int zoff = ssa_phaseOff(workPhase, 2); - int yoff = ssa_phaseOff(workPhase, 1); - int xoff = ssa_phaseOff(workPhase, 0); + int zoff = h_ssa_phaseOff(workPhase, 2); + int yoff = h_ssa_phaseOff(workPhase, 1); + int xoff = h_ssa_phaseOff(workPhase, 0); int workItem = 0; for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) { for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) { @@ -276,14 +297,14 @@ void NPairSSAKokkos::build(NeighList *list_) if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue; const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin; - const int ibinCt = c_bincount(ibin); + const int ibinCt = h_bincount(ibin); if (ibinCt > 0) { int base_n = 0; bool include_same = false; // count all local atoms in the current stencil "subphase" as potential neighbors for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { const int jbin = ibin+c_stencil(k); - if (jbin != ibin) base_n += c_bincount(jbin); + if (jbin != ibin) base_n += h_bincount(jbin); else include_same = true; } // Calculate how many ibin particles would have had some neighbors @@ -291,10 +312,10 @@ void NPairSSAKokkos::build(NeighList *list_) else if (include_same) inum += ibinCt - 1; } } - ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist - ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length + h_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist + h_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length #ifdef DEBUG_SSA_BUILD_LOCALS -if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n" +if (h_ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n" ,comm->me ,workPhase ,workItem @@ -311,14 +332,14 @@ if (ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3 fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n" ,comm->me ,workPhase - ,inum - ssa_itemLoc(workPhase, 0) + ,inum - h_ssa_itemLoc(workPhase, 0) ,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt ,workItem - ,(inum - ssa_itemLoc(workPhase, 0)) / (double) workItem + ,(inum - h_ssa_itemLoc(workPhase, 0)) / (double) workItem ); #endif // record where workPhase ends - ssa_phaseLen(workPhase) = workItem; + h_ssa_phaseLen(workPhase) = workItem; } #ifdef DEBUG_SSA_BUILD_LOCALS fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n" @@ -331,15 +352,30 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu #endif nl_size = inum; // record how much space is needed for the local work plan } + // count how many ghosts might have neighbors, and increase the work plan storage for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { int len = k_gbincount.h_view(workPhase + 1); - ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist - ssa_gitemLen(workPhase,0) = len; + h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist + h_ssa_gitemLen(workPhase,0) = len; nl_size += len; } list->grow(nl_size); // Make special larger SSA neighbor list + k_ssa_itemLoc.modify(); + k_ssa_itemLen.modify(); + k_ssa_gitemLoc.modify(); + k_ssa_gitemLen.modify(); + k_ssa_phaseOff.modify(); + k_ssa_phaseLen.modify(); + k_ssa_itemLoc.sync(); + k_ssa_itemLen.sync(); + k_ssa_gitemLen.sync(); + k_ssa_gitemLoc.sync(); + k_ssa_phaseOff.sync(); + k_ssa_phaseLen.sync(); + k_ssa_gphaseLen.sync(); + NPairSSAKokkosExecute data(*list, k_cutneighsq.view(), @@ -422,15 +458,27 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) { data.build_locals_onePhase(firstTry, comm->me, workPhase); }); - data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) + - ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1); + k_ssa_itemLoc.modify(); + k_ssa_itemLen.modify(); + k_ssa_phaseLen.modify(); + k_ssa_itemLoc.sync(); + k_ssa_itemLen.sync(); + k_ssa_phaseLen.sync(); + data.neigh_list.inum = h_ssa_itemLoc(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1) + + h_ssa_itemLen(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1); // loop over AIR ghost atoms, storing their local neighbors Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) { data.build_ghosts_onePhase(workPhase); }); - data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) + - ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; + k_ssa_gitemLoc.modify(); + k_ssa_gitemLen.modify(); + k_ssa_gphaseLen.modify(); + k_ssa_gitemLoc.sync(); + k_ssa_gitemLen.sync(); + k_ssa_gphaseLen.sync(); + data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) + + h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; firstTry = false; DeviceType::fence(); @@ -445,12 +493,12 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu } } - k_ssa_phaseLen.modify(); - k_ssa_itemLoc.modify(); - k_ssa_itemLen.modify(); - k_ssa_gphaseLen.modify(); - k_ssa_gitemLoc.modify(); - k_ssa_gitemLen.modify(); + //k_ssa_phaseLen.modify(); + //k_ssa_itemLoc.modify(); + //k_ssa_itemLen.modify(); + //k_ssa_gphaseLen.modify(); + //k_ssa_gitemLoc.modify(); + //k_ssa_gitemLen.modify(); list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something From efe60bf991c69d0cdd0e1f960f060c53abb62457 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 6 Jun 2017 13:10:04 -0600 Subject: [PATCH 239/267] Fixing more CUDA runtime issues --- src/KOKKOS/nbin_ssa_kokkos.cpp | 2 ++ src/KOKKOS/npair_ssa_kokkos.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 6c9e3a3446..f11d7e18ef 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -212,6 +212,8 @@ void NBinSSAKokkos::bin_atoms() }); DeviceType::fence(); } + k_bins.modify(); + k_bincount.modify(); c_bins = bins; // bins won't change until the next bin_atoms //now dispose of the k_binID array diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 0c3a5985ff..368fb1a6ed 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -275,7 +275,11 @@ void NPairSSAKokkos::build(NeighList *list_) auto h_bincount = k_bincount.h_view; const typename ArrayTypes::t_int_2d_const c_bins = k_bins.view(); const typename ArrayTypes::t_int_1d_const_um c_stencil = k_stencil.view(); + k_stencil.sync(); + auto h_stencil = k_stencil.h_view; const typename ArrayTypes::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view(); + k_nstencil_ssa.sync(); + auto h_nstencil_ssa = k_nstencil_ssa.h_view; int inum = 0; // loop over bins with local atoms, counting half of the neighbors @@ -302,8 +306,8 @@ void NPairSSAKokkos::build(NeighList *list_) int base_n = 0; bool include_same = false; // count all local atoms in the current stencil "subphase" as potential neighbors - for (int k = c_nstencil_ssa(subphase); k < c_nstencil_ssa(subphase+1); k++) { - const int jbin = ibin+c_stencil(k); + for (int k = h_nstencil_ssa(subphase); k < h_nstencil_ssa(subphase+1); k++) { + const int jbin = ibin+h_stencil(k); if (jbin != ibin) base_n += h_bincount(jbin); else include_same = true; } From 520ab26bd966b5fda778b5e30f4cbdeb95d8e842 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 7 Jun 2017 15:07:53 -0600 Subject: [PATCH 240/267] Fixing more CUDA runtime issues --- src/KOKKOS/nbin_ssa_kokkos.cpp | 3 +++ src/KOKKOS/npair_ssa_kokkos.cpp | 9 ++------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index f11d7e18ef..883ba25b24 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -216,6 +216,9 @@ void NBinSSAKokkos::bin_atoms() k_bincount.modify(); c_bins = bins; // bins won't change until the next bin_atoms + k_gbins.modify(); + k_gbincount.modify(); + //now dispose of the k_binID array k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0); binID = k_binID.view(); diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 368fb1a6ed..aec482993d 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -260,24 +260,18 @@ void NPairSSAKokkos::build(NeighList *list_) k_ssa_gitemLen.sync(); k_ssa_phaseOff.sync(); k_ssa_phaseLen.sync(); - k_ssa_gphaseLen.sync(); auto h_ssa_itemLoc = k_ssa_itemLoc.h_view; auto h_ssa_itemLen = k_ssa_itemLen.h_view; auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view; auto h_ssa_gitemLen = k_ssa_gitemLen.h_view; auto h_ssa_phaseOff = k_ssa_phaseOff.h_view; auto h_ssa_phaseLen = k_ssa_phaseLen.h_view; - auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view; { // Preflight the neighbor list workplan - const typename ArrayTypes::t_int_1d_const c_bincount = k_bincount.view(); k_bincount.sync(); auto h_bincount = k_bincount.h_view; - const typename ArrayTypes::t_int_2d_const c_bins = k_bins.view(); - const typename ArrayTypes::t_int_1d_const_um c_stencil = k_stencil.view(); k_stencil.sync(); auto h_stencil = k_stencil.h_view; - const typename ArrayTypes::t_int_1d_const c_nstencil_ssa = k_nstencil_ssa.view(); k_nstencil_ssa.sync(); auto h_nstencil_ssa = k_nstencil_ssa.h_view; int inum = 0; @@ -358,6 +352,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu } // count how many ghosts might have neighbors, and increase the work plan storage + k_gbincount.sync(); for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { int len = k_gbincount.h_view(workPhase + 1); h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist @@ -370,7 +365,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ssa_itemLen.modify(); k_ssa_gitemLoc.modify(); k_ssa_gitemLen.modify(); - k_ssa_phaseOff.modify(); k_ssa_phaseLen.modify(); k_ssa_itemLoc.sync(); k_ssa_itemLen.sync(); @@ -481,6 +475,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu k_ssa_gitemLoc.sync(); k_ssa_gitemLen.sync(); k_ssa_gphaseLen.sync(); + auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view; data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) + h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; firstTry = false; From 611bb6f130355d88c1b89e710cf963b629b2a443 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 09:31:51 -0600 Subject: [PATCH 241/267] Reduce memory churn in pair_table_rx_kokkos --- src/KOKKOS/pair_table_rx_kokkos.cpp | 26 ++++++++++++++------------ src/KOKKOS/pair_table_rx_kokkos.h | 5 +++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index eacaf83cf5..2f5a670537 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -534,10 +534,10 @@ static void compute_all_items( typename ArrayTypes::t_int_1d_const d_numneigh, typename ArrayTypes::t_x_array_randomread x, typename ArrayTypes::t_int_1d_randomread type, - Kokkos::View mixWtSite1old, - Kokkos::View mixWtSite2old, - Kokkos::View mixWtSite1, - Kokkos::View mixWtSite2, + Kokkos::View const& mixWtSite1old, + Kokkos::View const& mixWtSite2old, + Kokkos::View const& mixWtSite1, + Kokkos::View const& mixWtSite2, Few special_lj, Few, MAX_TYPES_STACKPARAMS+1> m_cutsq, typename ArrayTypes::t_ffloat_2d d_cutsq, @@ -597,10 +597,10 @@ static void getAllMixingWeights( int nspecies, int isite1, int isite2, bool fractionalWeighting, - Kokkos::View mixWtSite1old, - Kokkos::View mixWtSite2old, - Kokkos::View mixWtSite1, - Kokkos::View mixWtSite2) { + Kokkos::View const& mixWtSite1old, + Kokkos::View const& mixWtSite2old, + Kokkos::View const& mixWtSite1, + Kokkos::View const& mixWtSite2) { Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) { getMixingWeights(dvector,nspecies,isite1,isite2,fractionalWeighting, @@ -651,10 +651,12 @@ void PairTableRXKokkos::compute_style(int eflag_in, int vflag_in) // loop over neighbors of my atoms const int ntotal = atom->nlocal + atom->nghost; - auto mixWtSite1old = Kokkos::View("PairTableRXKokkos::mixWtSite1old", ntotal); - auto mixWtSite2old = Kokkos::View("PairTableRXKokkos::mixWtSite2old", ntotal); - auto mixWtSite1 = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); - auto mixWtSite2 = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); + if (ntotal > mixWtSite1.dimension_0()) { + mixWtSite1old = Kokkos::View("PairTableRXKokkos::mixWtSite1old", ntotal); + mixWtSite2old = Kokkos::View("PairTableRXKokkos::mixWtSite2old", ntotal); + mixWtSite1 = Kokkos::View("PairTableRXKokkos::mixWtSite1", ntotal); + mixWtSite2 = Kokkos::View("PairTableRXKokkos::mixWtSite2", ntotal); + } getAllMixingWeights(ntotal, atomKK->k_dvector.template view(), nspecies, isite1, isite2, fractionalWeighting, diff --git a/src/KOKKOS/pair_table_rx_kokkos.h b/src/KOKKOS/pair_table_rx_kokkos.h index 54c114a433..4230263dc9 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.h +++ b/src/KOKKOS/pair_table_rx_kokkos.h @@ -96,6 +96,11 @@ class PairTableRXKokkos : public PairTable { /* PairTableRX members */ + Kokkos::View mixWtSite1old; + Kokkos::View mixWtSite2old; + Kokkos::View mixWtSite1; + Kokkos::View mixWtSite2; + int nspecies; char *site1, *site2; int isite1, isite2; From 6f24c58c1a31d6f7fe8cac237e22b21c8a159660 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 09:52:00 -0600 Subject: [PATCH 242/267] Reduce memory churn in fix_rx_kokkos --- src/KOKKOS/fix_rx_kokkos.cpp | 52 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index d994b2c5d1..92db54d234 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -79,6 +79,17 @@ FixRxKokkos::~FixRxKokkos() { //printf("Inside FixRxKokkos::~FixRxKokkos copymode= %d\n", copymode); if (copymode) return; + + if (localTempFlag) + memory->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); + + memory->destroy_kokkos(k_sumWeights, sumWeights); + //memory->destroy_kokkos(k_sumWeights); + + //delete [] scratchSpace; + memory->destroy_kokkos(d_scratchSpace); + + memory->destroy_kokkos(k_cutsq); } /* ---------------------------------------------------------------------- */ @@ -1433,9 +1444,12 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF { const int count = nlocal + (newton_pair ? nghost : 0); - memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); - this->d_dpdThetaLocal = k_dpdThetaLocal.d_view; - this->h_dpdThetaLocal = k_dpdThetaLocal.h_view; + if (count > k_dpdThetaLocal.d_view.dimension_0()) { + memory->destroy_kokkos (k_dpdThetaLocal, dpdThetaLocal); + memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); + this->d_dpdThetaLocal = k_dpdThetaLocal.d_view; + this->h_dpdThetaLocal = k_dpdThetaLocal.h_view; + } const int neighflag = lmp->kokkos->neighflag; @@ -1527,7 +1541,10 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF //double *scratchSpace = new double[ scratchSpaceSize * nlocal ]; //typename ArrayTypes::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal); - memory->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace"); + if (nlocal*scratchSpaceSize > d_scratchSpace.dimension_0()) { + memory->destroy_kokkos (d_scratchSpace); + memory->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace"); + } #if 0 Kokkos::parallel_reduce( nlocal, LAMMPS_LAMBDA(int i, CounterType &counter) @@ -1630,9 +1647,6 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF Kokkos::parallel_reduce( Kokkos::RangePolicy >(0,nlocal), *this, TotalCounters); #endif - //delete [] scratchSpace; - memory->destroy_kokkos (d_scratchSpace); - TimerType timer_ODE = getTimeStamp(); // Check the error flag for any failures. @@ -1651,9 +1665,6 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF atomKK->modified ( Host, DVECTOR_MASK ); - if (localTempFlag) - memory->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); - TimerType timer_stop = getTimeStamp(); double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE); @@ -2012,8 +2023,11 @@ void FixRxKokkos::computeLocalTemperature() const int ntypes = atom->ntypes; //memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); - memory->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq"); - d_cutsq = k_cutsq.template view(); + if (ntypes+1 > k_cutsq.dimension_0()) { + memory->destroy_kokkos (k_cutsq); + memory->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq"); + d_cutsq = k_cutsq.template view(); + } for (int i = 1; i <= ntypes; ++i) for (int j = i; j <= ntypes; ++j) @@ -2030,9 +2044,12 @@ void FixRxKokkos::computeLocalTemperature() int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); //memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); - memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); - d_sumWeights = k_sumWeights.d_view; - h_sumWeights = k_sumWeights.h_view; + if (sumWeightsCt > k_sumWeights.d_view.dimension_0()) { + memory->destroy_kokkos(k_sumWeights, sumWeights); + memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + d_sumWeights = k_sumWeights.d_view; + h_sumWeights = k_sumWeights.h_view; + } // Initialize the accumulator to zero ... //Kokkos::parallel_for (sumWeightsCt, @@ -2165,11 +2182,6 @@ void FixRxKokkos::computeLocalTemperature() Kokkos::parallel_for (Kokkos::RangePolicy >(0, nlocal), *this); #endif - // Clean up the local kokkos data. - //memory->destroy_kokkos(k_cutsq, h_cutsq); - memory->destroy_kokkos(k_cutsq); - //memory->destroy_kokkos(k_sumWeights, sumWeights); - memory->destroy_kokkos(k_sumWeights); } /* ---------------------------------------------------------------------- */ From 43cfa10ea48df7323ce4c15996aacddcb66b2228 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 09:58:10 -0600 Subject: [PATCH 243/267] Reduce memory churn in pair_multi_lucy_rx_kokkos --- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index ef30fdc6f6..f7e1bad056 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -180,10 +180,12 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in { const int ntotal = nlocal + nghost; - d_mixWtSite1old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1old",ntotal); - d_mixWtSite2old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2old",ntotal); - d_mixWtSite1 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1",ntotal); - d_mixWtSite2 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2",ntotal); + if (ntotal > d_mixWtSite1.dimension_0()) { + d_mixWtSite1old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1old",ntotal); + d_mixWtSite2old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2old",ntotal); + d_mixWtSite1 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1",ntotal); + d_mixWtSite2 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2",ntotal); + } Kokkos::parallel_for(Kokkos::RangePolicy(0,ntotal),*this); } From b4b7310884382a18f9439983a4c241c24998d88c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 13:33:23 -0600 Subject: [PATCH 244/267] Fixing CUDA runtime issues in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 16 ++++++++-------- src/KOKKOS/pair_exp6_rx_kokkos.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 5b84f09fd6..1eb1c6c770 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -426,7 +426,7 @@ KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCompute, const int &ii, EV_FLOAT& ev) const { { - const bool one_type = (atom->ntypes == 1); + const bool one_type = (ntypes == 1); if (isite1 == isite2) if (one_type) this->vectorized_operator(ii, ev); @@ -797,7 +797,7 @@ KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics, const int &ii, EV_FLOAT& ev) const { { - const bool one_type = (atom->ntypes == 1); + const bool one_type = (ntypes == 1); if (isite1 == isite2) if (one_type) this->vectorized_operator(ii, ev); @@ -1653,18 +1653,18 @@ template void PairExp6rxKokkos::allocate() { allocated = 1; - int n = atom->ntypes; + ntypes = atom->ntypes; - memory->create(setflag,n+1,n+1,"pair:setflag"); - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) + memory->create(setflag,ntypes+1,ntypes+1,"pair:setflag"); + for (int i = 1; i <= ntypes; i++) + for (int j = i; j <= ntypes; j++) setflag[i][j] = 0; - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"pair:cutsq"); d_cutsq = k_cutsq.template view(); k_cutsq.template modify(); - memory->create(cut,n+1,n+1,"pair:cut_lj"); + memory->create(cut,ntypes+1,ntypes+1,"pair:cut_lj"); } diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 09283662a2..4c35c76851 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx { int eflag,vflag; int nlocal,newton_pair,neighflag; double special_lj[4]; - int num_threads; + int num_threads,ntypes; typename AT::t_x_array_randomread x; typename AT::t_f_array f; From 86497949f20a2a6ae0609172e9aabf4e7221390d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 13:40:20 -0600 Subject: [PATCH 245/267] Fixing CUDA runtime issues in fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 21 ++++++++++++++++----- src/KOKKOS/fix_shardlow_kokkos.h | 2 ++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 52287d586c..b3d4e86244 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -444,9 +444,6 @@ void FixShardlowKokkos::ssa_update_dpde( rand_type rand_gen = rand_pool.get_state(id); #endif - const double boltz_inv = 1.0/force->boltz; - const double ftm2v = force->ftm2v; - const double dt = update->dt; int ct = count; int ii = start_ii; @@ -639,6 +636,16 @@ void FixShardlowKokkos::initial_integrate(int vflag) ssa_gitemLoc = np_ssa->ssa_gitemLoc; ssa_gitemLen = np_ssa->ssa_gitemLen; + np_ssa->k_ssa_itemLoc.template sync(); + np_ssa->k_ssa_itemLen.template sync(); + np_ssa->k_ssa_gitemLoc.template sync(); + np_ssa->k_ssa_gitemLen.template sync(); + + np_ssa->k_ssa_phaseLen.template sync(); + np_ssa->k_ssa_gphaseLen.template sync(); + auto h_ssa_phaseLen = np_ssa->k_ssa_phaseLen.h_view; + auto h_ssa_gphaseLen = np_ssa->k_ssa_gphaseLen.h_view; + int maxWorkItemCt = (int) ssa_itemLoc.dimension_1(); if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) { maxWorkItemCt = (int) ssa_gitemLoc.dimension_1(); @@ -670,9 +677,13 @@ void FixShardlowKokkos::initial_integrate(int vflag) deep_copy(d_hist, h_hist); #endif + boltz_inv = 1.0/force->boltz; + ftm2v = force->ftm2v; + dt = update->dt; + // process neighbors in the local AIR for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { - int workItemCt = ssa_phaseLen[workPhase]; + int workItemCt = h_ssa_phaseLen[workPhase]; if(atom->ntypes > MAX_TYPES_STACKPARAMS) { Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { @@ -692,7 +703,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) //Loop over all 13 outward directions (7 stages) for (int workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) { // int airnum = workPhase + 1; - int workItemCt = ssa_gphaseLen[workPhase]; + int workItemCt = h_ssa_gphaseLen[workPhase]; // Communicate the updated velocities to all nodes comm->forward_comm_fix(this); diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 4dc47709e1..df8849d80b 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -68,6 +68,8 @@ class FixShardlowKokkos : public FixShardlow { #endif protected: + double boltz_inv,ftm2v,dt; + // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; From c51cadcc6c38ff2c939fb0bed46dd73c09873c2d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 9 Jun 2017 09:31:37 -0600 Subject: [PATCH 246/267] Fixing CUDA runtime issues in fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 66 ++++++++++++++++-------------- src/KOKKOS/fix_shardlow_kokkos.h | 17 +++++++- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index b3d4e86244..d2fb937a57 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -436,7 +436,7 @@ template KOKKOS_INLINE_FUNCTION void FixShardlowKokkos::ssa_update_dpde( int start_ii, int count, int id -) +) const { #ifdef DPD_USE_RAN_MARS class RanMars *pRNG = pp_random[id]; @@ -682,26 +682,18 @@ void FixShardlowKokkos::initial_integrate(int vflag) dt = update->dt; // process neighbors in the local AIR - for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { + for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { int workItemCt = h_ssa_phaseLen[workPhase]; - if(atom->ntypes > MAX_TYPES_STACKPARAMS) { - Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { - int ct = ssa_itemLen(workPhase, workItem); - int ii = ssa_itemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct, workItem); - }); - } else { - Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { - int ct = ssa_itemLen(workPhase, workItem); - int ii = ssa_itemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct, workItem); - }); - } + + if(atom->ntypes > MAX_TYPES_STACKPARAMS) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); } //Loop over all 13 outward directions (7 stages) - for (int workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) { + for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) { // int airnum = workPhase + 1; int workItemCt = h_ssa_gphaseLen[workPhase]; @@ -713,27 +705,21 @@ void FixShardlowKokkos::initial_integrate(int vflag) // memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost); // memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); + // must capture local variables, not class variables + auto l_uCond = uCond; + auto l_uMech = uMech; Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) { - uCond(i) = 0.0; - uMech(i) = 0.0; + l_uCond(i) = 0.0; + l_uMech(i) = 0.0; }); DeviceType::fence(); } // process neighbors in this AIR - if(atom->ntypes > MAX_TYPES_STACKPARAMS) { - Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { - int ct = ssa_gitemLen(workPhase, workItem); - int ii = ssa_gitemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct, workItem); - }); - } else { - Kokkos::parallel_for(workItemCt, LAMMPS_LAMBDA (const int workItem ) { - int ct = ssa_gitemLen(workPhase, workItem); - int ii = ssa_gitemLoc(workPhase, workItem); - ssa_update_dpde(ii, ct, workItem); - }); - } + if(atom->ntypes > MAX_TYPES_STACKPARAMS) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); // Communicate the ghost deltas to the atom owners comm->reverse_comm_fix(this); @@ -755,6 +741,24 @@ fprintf(stdout, "\n%6d %6d,%6d %6d: " copymode = 0; } +template +template +KOKKOS_INLINE_FUNCTION +void FixShardlowKokkos::operator()(TagFixShardlowSSAUpdateDPDE, const int &workItem) const { + const int ct = ssa_itemLen(workPhase, workItem); + const int ii = ssa_itemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct, workItem); +} + +template +template +KOKKOS_INLINE_FUNCTION +void FixShardlowKokkos::operator()(TagFixShardlowSSAUpdateDPDEGhost, const int &workItem) const { + const int ct = ssa_gitemLen(workPhase, workItem); + const int ii = ssa_gitemLoc(workPhase, workItem); + ssa_update_dpde(ii, ct, workItem); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index df8849d80b..91a2fdbc97 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -30,6 +30,12 @@ FixStyle(shardlow/kk/host,FixShardlowKokkos) namespace LAMMPS_NS { +template +struct TagFixShardlowSSAUpdateDPDE{}; + +template +struct TagFixShardlowSSAUpdateDPDEGhost{}; + template class FixShardlowKokkos : public FixShardlow { public: @@ -60,6 +66,14 @@ class FixShardlowKokkos : public FixShardlow { F_FLOAT cutinv,halfsigma,kappa,alpha; }; + template + KOKKOS_INLINE_FUNCTION + void operator()(TagFixShardlowSSAUpdateDPDE, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagFixShardlowSSAUpdateDPDEGhost, const int&) const; + #ifdef DEBUG_PAIR_CT typename AT::t_int_2d d_counters; typename HAT::t_int_2d h_counters; @@ -68,6 +82,7 @@ class FixShardlowKokkos : public FixShardlow { #endif protected: + int workPhase; double boltz_inv,ftm2v,dt; // class PairDPDfdt *pairDPD; @@ -127,7 +142,7 @@ class FixShardlowKokkos : public FixShardlow { // void ssa_update_dpd(int, int); // Constant Temperature template KOKKOS_INLINE_FUNCTION - void ssa_update_dpde(int, int, int); // Constant Energy + void ssa_update_dpde(int, int, int) const; // Constant Energy }; From 3c8e75ad590ae35be1002ce88281d88d8bbfc6f9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 9 Jun 2017 10:57:35 -0600 Subject: [PATCH 247/267] Add missing sync/modify to fix_shardlow_kokkos --- src/KOKKOS/fix_shardlow_kokkos.cpp | 27 ++++++++++++++++++--------- src/KOKKOS/fix_shardlow_kokkos.h | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index d2fb937a57..0c7c51c821 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -73,11 +73,11 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0) { kokkosable = 1; -// atomKK = (AtomKokkos *) atom; -// execution_space = ExecutionSpaceFromDevice::space; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; -// datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK; -// datamask_modify = Q_MASK | X_MASK; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; if (narg != 3) error->all(FLERR,"Illegal fix shardlow command"); @@ -167,6 +167,7 @@ void FixShardlowKokkos::init() //FIXME either create cutsq and fill it in, or just point to pairDPD's... // memory->destroy(cutsq); //FIXME // memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"FixShardlowKokkos:cutsq"); + k_pairDPDE->k_cutsq.template sync(); d_cutsq = k_pairDPDE->k_cutsq.template view(); //FIXME const double boltz2 = 2.0*force->boltz; @@ -288,10 +289,6 @@ void FixShardlowKokkos::ssa_update_dpd( rand_type rand_gen = rand_pool.get_state(id); #endif - const double theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j - const double boltz_inv = 1.0/force->boltz; - const double ftm2v = force->ftm2v; - const double dt = update->dt; int ct = count; int ii = start_ii; @@ -677,20 +674,24 @@ void FixShardlowKokkos::initial_integrate(int vflag) deep_copy(d_hist, h_hist); #endif + //theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j boltz_inv = 1.0/force->boltz; ftm2v = force->ftm2v; dt = update->dt; + k_params.template sync(); + // process neighbors in the local AIR + atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK); for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) { int workItemCt = h_ssa_phaseLen[workPhase]; - if(atom->ntypes > MAX_TYPES_STACKPARAMS) Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); } + atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK); //Loop over all 13 outward directions (7 stages) for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) { @@ -698,7 +699,9 @@ void FixShardlowKokkos::initial_integrate(int vflag) int workItemCt = h_ssa_gphaseLen[workPhase]; // Communicate the updated velocities to all nodes + atomKK->sync(Host,V_MASK); comm->forward_comm_fix(this); + atomKK->modified(Host,V_MASK); if(k_pairDPDE){ // Zero out the ghosts' uCond & uMech to be used as delta accumulators @@ -706,6 +709,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) // memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost); // must capture local variables, not class variables + atomKK->sync(execution_space,UCOND_MASK | UMECH_MASK); auto l_uCond = uCond; auto l_uMech = uMech; Kokkos::parallel_for(Kokkos::RangePolicy(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) { @@ -713,16 +717,21 @@ void FixShardlowKokkos::initial_integrate(int vflag) l_uMech(i) = 0.0; }); DeviceType::fence(); + atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK); } // process neighbors in this AIR + atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK); if(atom->ntypes > MAX_TYPES_STACKPARAMS) Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,workItemCt),*this); + atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK); // Communicate the ghost deltas to the atom owners + atomKK->sync(Host,V_MASK | UCOND_MASK | UMECH_MASK); comm->reverse_comm_fix(this); + atomKK->modified(Host,V_MASK | UCOND_MASK | UMECH_MASK); } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 91a2fdbc97..3dbbaaa61c 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -83,7 +83,7 @@ class FixShardlowKokkos : public FixShardlow { protected: int workPhase; - double boltz_inv,ftm2v,dt; + double theta_ij_inv,boltz_inv,ftm2v,dt; // class PairDPDfdt *pairDPD; PairDPDfdtEnergyKokkos *k_pairDPDE; From b96b6b9cd775b43007777ba2182949c331ca9fb2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Jun 2017 14:04:16 -0600 Subject: [PATCH 248/267] Fixing error checks --- src/KOKKOS/fix_rx_kokkos.cpp | 2 +- src/USER-DPD/fix_shardlow.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index ac81e5c2a7..6fbdfad289 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -116,7 +116,7 @@ void FixRxKokkos::init() bool eos_flag = false; for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"eos/table/rx") == 0) eos_flag = true; + if (strncmp(modify->fix[i]->style,"eos/table/rx",3) == 0) eos_flag = true; if(!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified"); if (update_kinetics_data) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index a1059e2fb0..f3057a6563 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -157,7 +157,7 @@ void FixShardlow::setup(int vflag) error->all(FLERR,"Cannot use constant temperature integration routines with DPD."); for (int i = 0; i < modify->nfix; i++){ - if (strcmp(modify->fix[i]->style,"shardlow") == 0) fixShardlow = true; + if (strncmp(modify->fix[i]->style,"shardlow",3) == 0) fixShardlow = true; if (strncmp(modify->fix[i]->style,"nve",3) == 0 || (strncmp(modify->fix[i]->style,"nph",3) == 0)){ if(fixShardlow) break; else error->all(FLERR,"The deterministic integrator must follow fix shardlow in the input file."); From 67a0183b333225a89902aba88c4bdc69160709d6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 19 Jun 2017 15:23:33 -0600 Subject: [PATCH 249/267] Removing atom2bin change since ssa neighlists aren't be used for occasional lists --- src/USER-DPD/nbin_ssa.cpp | 3 --- src/USER-DPD/npair_half_bin_newton_ssa.cpp | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/USER-DPD/nbin_ssa.cpp b/src/USER-DPD/nbin_ssa.cpp index 5dacf52ee1..4c57a8e70f 100644 --- a/src/USER-DPD/nbin_ssa.cpp +++ b/src/USER-DPD/nbin_ssa.cpp @@ -76,7 +76,6 @@ void NBinSSA::bin_atoms() int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above for (i = nall-1; i >= nowned; i--) { ibin = coord2ssaAIR(x[i]); - atom2bin[i] = ibin; if (ibin < 1) continue; // skip ghost atoms not in AIR if (mask[i] & bitmask) { bins[i] = gairhead_ssa[ibin]; @@ -86,7 +85,6 @@ void NBinSSA::bin_atoms() } else { for (i = nall-1; i >= nlocal; i--) { ibin = coord2ssaAIR(x[i]); - atom2bin[i] = ibin; if (ibin < 1) continue; // skip ghost atoms not in AIR bins[i] = gairhead_ssa[ibin]; gairhead_ssa[ibin] = i; @@ -94,7 +92,6 @@ void NBinSSA::bin_atoms() } for (i = nlocal-1; i >= 0; i--) { ibin = coord2bin(x[i][0], x[i][1], x[i][2], xbin, ybin, zbin); - atom2bin[i] = ibin; // Find the bounding box of the local atoms in the bins if (xbin < lbinxlo) lbinxlo = xbin; if (xbin >= lbinxhi) lbinxhi = xbin + 1; diff --git a/src/USER-DPD/npair_half_bin_newton_ssa.cpp b/src/USER-DPD/npair_half_bin_newton_ssa.cpp index 221aa5b454..a6479d4c4f 100644 --- a/src/USER-DPD/npair_half_bin_newton_ssa.cpp +++ b/src/USER-DPD/npair_half_bin_newton_ssa.cpp @@ -251,7 +251,7 @@ void NPairHalfBinNewtonSSA::build(NeighList *list) ytmp = x[i][1]; ztmp = x[i][2]; - ibin = atom2bin[i]; + ibin = coord2bin(x[i],xbin,ybin,zbin); // loop over AIR ghost atoms in all bins in "full" stencil // Note: the non-AIR ghost atoms have already been filtered out From 270abff2a2a1923fe1cd66de92a64caecf9579b3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Aug 2017 14:59:19 -0600 Subject: [PATCH 250/267] Fix compile error for CUDA in pair_exp6_rx_kokkos --- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 1eb1c6c770..46e06ca200 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -2112,6 +2112,7 @@ void partition_range( const int begin, const int end, int &thread_begin, int &th /* ---------------------------------------------------------------------- */ +#ifndef KOKKOS_HAVE_CUDA template template void PairExp6rxKokkos::getMixingWeightsVect(const int np_total, int errorFlag, @@ -2460,6 +2461,7 @@ void PairExp6rxKokkos::getMixingWeightsVect(const int np_total, int if (errorFlag2 > 0) errorFlag = 2; } +#endif /* ---------------------------------------------------------------------- */ From 4784506ba907a8f209e6872e9d5a4e020bb30fcd Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Aug 2017 15:02:26 -0600 Subject: [PATCH 251/267] Remove unused function in rand_pool_wrap_kokkos --- src/KOKKOS/rand_pool_wrap_kokkos.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.h b/src/KOKKOS/rand_pool_wrap_kokkos.h index ce134e5215..975ce0c89a 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.h +++ b/src/KOKKOS/rand_pool_wrap_kokkos.h @@ -69,8 +69,6 @@ class RandPoolWrap : protected Pointers { } - void clean_copy() { random_thr = NULL; } - private: class RanMars **random_thr; int nthreads; From f5a99dece766a3dc35ae1c2b63cf4d7c4f75795d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Aug 2017 15:08:44 -0600 Subject: [PATCH 252/267] Remove unnecessary thread fences --- src/KOKKOS/fix_shardlow_kokkos.cpp | 1 - src/KOKKOS/fix_wall_lj93_kokkos.cpp | 1 - src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 2 -- 3 files changed, 4 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 0c7c51c821..e3d9723c53 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -716,7 +716,6 @@ void FixShardlowKokkos::initial_integrate(int vflag) l_uCond(i) = 0.0; l_uMech(i) = 0.0; }); - DeviceType::fence(); atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK); } diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.cpp b/src/KOKKOS/fix_wall_lj93_kokkos.cpp index 38c7347e97..b0f7e0bda4 100644 --- a/src/KOKKOS/fix_wall_lj93_kokkos.cpp +++ b/src/KOKKOS/fix_wall_lj93_kokkos.cpp @@ -62,7 +62,6 @@ void FixWallLJ93Kokkos::wall_particle(int m_in, int which, double co copymode = 1; FixWallLJ93KokkosFunctor wp_functor(this); Kokkos::parallel_reduce(nlocal,wp_functor,ewall); - DeviceType::fence(); copymode = 0; atomKK->modified(execution_space, F_MASK); diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index f7e1bad056..08e0f5096e 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -673,7 +673,6 @@ int PairMultiLucyRXKokkos::pack_forward_comm_kokkos(int n, DAT::tdua iswap = iswap_in; v_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); - DeviceType::fence(); return n; } @@ -692,7 +691,6 @@ void PairMultiLucyRXKokkos::unpack_forward_comm_kokkos(int n, int fi first = first_in; v_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); - DeviceType::fence(); atomKK->modified(execution_space,DPDRHO_MASK); } From a641289d5ba30cf60c0c6ff4f3177b0f03836138 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 25 Aug 2017 12:36:53 -0600 Subject: [PATCH 253/267] Must use atomics for GPUs in pair_exp6_rx_kokkos --- src/KOKKOS/kokkos.cpp | 8 -------- src/KOKKOS/kokkos.h | 1 - src/KOKKOS/pair_exp6_rx_kokkos.cpp | 6 +++--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 10e7bda4e0..072a802b54 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -34,7 +34,6 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) lmp->kokkos = this; auto_sync = 1; - gb_test = 1; int me = 0; MPI_Comm_rank(world,&me); @@ -157,7 +156,6 @@ void KokkosLMP::accelerator(int narg, char **arg) neighflag = FULL; neighflag_qeq = FULL; neighflag_qeq_set = 0; - gb_test = 1; int newtonflag = 0; double binsize = 0.0; exchange_comm_classic = forward_comm_classic = 0; @@ -199,12 +197,6 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) newtonflag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; - } else if (strcmp(arg[iarg],"gb/test") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); - if (strcmp(arg[iarg+1],"off") == 0) gb_test = 0; - else if (strcmp(arg[iarg+1],"on") == 0) gb_test = 1; - else error->all(FLERR,"Illegal package kokkos command"); - iarg += 2; } else if (strcmp(arg[iarg],"comm") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"no") == 0) { diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 3784d806bf..8e28b38cbf 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -32,7 +32,6 @@ class KokkosLMP : protected Pointers { int num_threads,ngpu; int numa; int auto_sync; - int gb_test; KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP(); diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 46e06ca200..b3e413428d 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -275,7 +275,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) EV_FLOAT ev; - if (!lmp->kokkos->gb_test) { +#ifdef KOKKOS_HAVE_CUDA // Use atomics if (neighflag == HALF) { if (newton_pair) { @@ -303,7 +303,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) } } - } else { // No atomics +#else // No atomics num_threads = lmp->kokkos->num_threads; int nmax = f.dimension_0(); @@ -343,7 +343,7 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); - } +#endif k_error_flag.template modify(); k_error_flag.template sync(); From a062944de95e013abafb4604f0b3e1d830d8a161 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 23 Aug 2017 16:08:01 -0600 Subject: [PATCH 254/267] Fix execution space issues --- src/KOKKOS/atom_vec_dpd_kokkos.h | 2 ++ src/KOKKOS/fix_eos_table_rx_kokkos.cpp | 12 ++++++------ src/KOKKOS/fix_rx_kokkos.cpp | 20 +++++++++---------- src/KOKKOS/fix_rx_kokkos.h | 14 +++++++------ src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 6 +++--- src/KOKKOS/pair_dpd_fdt_energy_kokkos.h | 4 ++-- src/KOKKOS/pair_exp6_rx_kokkos.cpp | 24 +++++++++++------------ src/KOKKOS/pair_exp6_rx_kokkos.h | 4 ++-- src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp | 16 +++++++-------- src/KOKKOS/pair_multi_lucy_rx_kokkos.h | 4 ++-- 10 files changed, 55 insertions(+), 51 deletions(-) diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index d108e58ae7..372404cc7d 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -14,6 +14,8 @@ #ifdef ATOM_CLASS AtomStyle(dpd/kk,AtomVecDPDKokkos) +AtomStyle(dpd/kk/device,AtomVecDPDKokkos) +AtomStyle(dpd/kk/host,AtomVecDPDKokkos) #else diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 8487fd4c4f..552141ced2 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -197,7 +197,7 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXInit, const int double tmp; if (mask[i] & groupbit) { if(dpdTheta[i] <= 0.0) - k_error_flag.d_view() = 1; + k_error_flag.template view()() = 1; energy_lookup(i,dpdTheta[i],tmp); uCond[i] = 0.0; uMech[i] = tmp; @@ -239,7 +239,7 @@ void FixEOStableRXKokkos::operator()(TagFixEOStableRXTemperatureLook if (mask[i] & groupbit){ temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]); if (dpdTheta[i] <= 0.0) - k_error_flag.d_view() = 1; + k_error_flag.template view()() = 1; } } @@ -387,11 +387,11 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub // Apply the Secant Method for(it=0; it()() = 2; temp = t1; temp = MAX(temp,lo); temp = MIN(temp,hi); - k_warning_flag.d_view() = 1; + k_warning_flag.template view()() = 1; break; } temp = t2 - f2*(t2-t1)/(f2-f1); @@ -404,9 +404,9 @@ void FixEOStableRXKokkos::temperature_lookup(int id, double ui, doub } if(it==maxit){ if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2)) - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; else - k_error_flag.d_view() = 3; + k_error_flag.template view()() = 3; } thetai = temp; } diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index f04b1a3a49..b1cfd20be2 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -1403,7 +1403,7 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_solveSystemsone(FLERR,"Computed concentration in RK solver is < -1.0e-10"); - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; // This should be an atomic update. } else if (y[ispecies] < MY_EPSILON) @@ -1444,10 +1444,10 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF { const int count = nlocal + (newton_pair ? nghost : 0); - if (count > k_dpdThetaLocal.d_view.dimension_0()) { + if (count > k_dpdThetaLocal.template view().dimension_0()) { memory->destroy_kokkos (k_dpdThetaLocal, dpdThetaLocal); memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); - this->d_dpdThetaLocal = k_dpdThetaLocal.d_view; + this->d_dpdThetaLocal = k_dpdThetaLocal.template view(); this->h_dpdThetaLocal = k_dpdThetaLocal.h_view; } @@ -1514,8 +1514,8 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF memory->create_kokkos (k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps, nlocal, "FixRxKokkos::diagnosticCounterPerODEnSteps"); memory->create_kokkos (k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs, nlocal, "FixRxKokkos::diagnosticCounterPerODEnFuncs"); - d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.d_view; - d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.d_view; + d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.template view(); + d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.template view(); Kokkos::parallel_for ( Kokkos::RangePolicy(0,nlocal), *this); //Kokkos::parallel_for ( nlocal, @@ -1619,7 +1619,7 @@ void FixRxKokkos::solve_reactions(const int vflag, const bool isPreF if (y[ispecies] < -1.0e-10) { //error->one(FLERR,"Computed concentration in RK solver is < -1.0e-10"); - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; // This should be an atomic update. } else if (y[ispecies] < MY_EPSILON) @@ -1907,7 +1907,7 @@ void FixRxKokkos::operator()(Tag_FixRxKokkos_firstPairOperator::value> > AtomicViewType; + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; AtomicViewType a_sumWeights = d_sumWeights; @@ -2044,10 +2044,10 @@ void FixRxKokkos::computeLocalTemperature() int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); //memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); - if (sumWeightsCt > k_sumWeights.d_view.dimension_0()) { + if (sumWeightsCt > k_sumWeights.template view().dimension_0()) { memory->destroy_kokkos(k_sumWeights, sumWeights); memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); - d_sumWeights = k_sumWeights.d_view; + d_sumWeights = k_sumWeights.template view(); h_sumWeights = k_sumWeights.h_view; } @@ -2083,7 +2083,7 @@ void FixRxKokkos::computeLocalTemperature() // Create an atomic view of sumWeights and dpdThetaLocal. Only needed // for Half/thread scenarios. //typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; - typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, typename DAT::t_efloat_1d::device_type, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; + typedef Kokkos::View< E_FLOAT*, typename DAT::t_efloat_1d::array_layout, DeviceType, Kokkos::MemoryTraits< AtomicF< NEIGHFLAG >::value> > AtomicViewType; AtomicViewType a_dpdThetaLocal = d_dpdThetaLocal; AtomicViewType a_sumWeights = d_sumWeights; diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index 169a87a2f9..92b715f34d 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -74,6 +74,8 @@ typedef struct s_CounterType CounterType; template class FixRxKokkos : public FixRX { public: + typedef ArrayTypes AT; + FixRxKokkos(class LAMMPS *, int, char **); virtual ~FixRxKokkos(); virtual void init(); @@ -202,10 +204,10 @@ class FixRxKokkos : public FixRX { DAT::tdual_int_1d k_diagnosticCounterPerODEnFuncs; //typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnSteps; //typename ArrayTypes::t_int_1d d_diagnosticCounterPerODEnFuncs; - typename DAT::t_int_1d d_diagnosticCounterPerODEnSteps; - typename DAT::t_int_1d d_diagnosticCounterPerODEnFuncs; - typename HAT::t_int_1d h_diagnosticCounterPerODEnSteps; - typename HAT::t_int_1d h_diagnosticCounterPerODEnFuncs; + typename AT::t_int_1d d_diagnosticCounterPerODEnSteps; + typename AT::t_int_1d d_diagnosticCounterPerODEnFuncs; + HAT::t_int_1d h_diagnosticCounterPerODEnSteps; + HAT::t_int_1d h_diagnosticCounterPerODEnFuncs; template struct KineticsType @@ -233,8 +235,8 @@ class FixRxKokkos : public FixRX { // Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks. DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights; //typename ArrayTypes::t_efloat_1d d_dpdThetaLocal, d_sumWeights; - typename DAT::t_efloat_1d d_dpdThetaLocal, d_sumWeights; - typename HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights; + typename AT::t_efloat_1d d_dpdThetaLocal, d_sumWeights; + HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights; typename ArrayTypes::t_x_array_randomread d_x ; typename ArrayTypes::t_int_1d_randomread d_type ; diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index 03bf1a8b61..c559ab412f 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -169,12 +169,12 @@ void PairDPDfdtEnergyKokkos::compute(int eflag_in, int vflag_in) if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); - d_eatom = k_eatom.d_view; + d_eatom = k_eatom.template view(); } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); - d_vatom = k_vatom.d_view; + d_vatom = k_vatom.template view(); } x = atomKK->k_x.view(); @@ -645,7 +645,7 @@ void PairDPDfdtEnergyKokkos::allocate() d_cutsq = k_cutsq.template view(); k_params = Kokkos::DualView("PairDPDfdtEnergy::params",n+1,n+1); - params = k_params.d_view; + params = k_params.template view(); if (!splitFDT_flag) { memory->destroy(duCond); diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h index fcf4b33a7a..424779f839 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.h @@ -139,8 +139,8 @@ class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy { DAT::tdual_efloat_1d k_eatom; DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d_randomread d_ilist; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index b3e413428d..8d65be23af 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -153,12 +153,12 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); - d_eatom = k_eatom.d_view; + d_eatom = k_eatom.template view(); } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); - d_vatom = k_vatom.d_view; + d_vatom = k_vatom.template view(); } x = atomKK->k_x.view(); @@ -582,7 +582,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute()() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rmOld12_ij; @@ -676,7 +676,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCompute()() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rm12_ij; @@ -953,7 +953,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics()() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rmOld12_ij; @@ -1047,7 +1047,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics()() = 1; // A3. Compute some convenient quantities for evaluating the force rminv = 1.0/rm12_ij; @@ -1592,7 +1592,7 @@ void PairExp6rxKokkos::vectorized_operator(const int &ii, EV_FLOAT& } if (hasError) - k_error_flag.d_view() = 1; + k_error_flag.template view()() = 1; if (UseAtomics) { @@ -1887,7 +1887,7 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub } } if(nTotal < MY_EPSILON || nTotalold < MY_EPSILON) - k_error_flag.d_view() = 1; + k_error_flag.template view()() = 1; // Compute the mole fraction of molecules within the fluid portion of the particle (One Fluid Approximation) fractionOFAold = nMoleculesOFAold / nTotalold; @@ -2042,28 +2042,28 @@ void PairExp6rxKokkos::getMixingWeights(int id,double &epsilon1,doub // Check that no fractions are less than zero if(fraction1 < 0.0 || nMolecules1 < 0.0){ if(fraction1 < -MY_EPSILON || nMolecules1 < -MY_EPSILON){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } nMolecules1 = 0.0; fraction1 = 0.0; } if(fraction2 < 0.0 || nMolecules2 < 0.0){ if(fraction2 < -MY_EPSILON || nMolecules2 < -MY_EPSILON){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } nMolecules2 = 0.0; fraction2 = 0.0; } if(fractionOld1 < 0.0 || nMoleculesOld1 < 0.0){ if(fractionOld1 < -MY_EPSILON || nMoleculesOld1 < -MY_EPSILON){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } nMoleculesOld1 = 0.0; fractionOld1 = 0.0; } if(fractionOld2 < 0.0 || nMoleculesOld2 < 0.0){ if(fractionOld2 < -MY_EPSILON || nMoleculesOld2 < -MY_EPSILON){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } nMoleculesOld2 = 0.0; fractionOld2 = 0.0; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 4c35c76851..5e44048ae2 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -161,8 +161,8 @@ class PairExp6rxKokkos : public PairExp6rx { DAT::tdual_efloat_1d k_eatom; DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; DAT::tdual_int_scalar k_error_flag; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index 08e0f5096e..d9a4f1ab83 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -155,12 +155,12 @@ void PairMultiLucyRXKokkos::compute_style(int eflag_in, int vflag_in if (eflag_atom) { memory->destroy_kokkos(k_eatom,eatom); memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); - d_eatom = k_eatom.d_view; + d_eatom = k_eatom.template view(); } if (vflag_atom) { memory->destroy_kokkos(k_vatom,vatom); memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); - d_vatom = k_vatom.d_view; + d_vatom = k_vatom.template view(); } x = atomKK->k_x.view(); @@ -328,7 +328,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXComputeinnersq || rho[j]*rho[j] < tb->innersq){ if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){ - k_error_flag.d_view() = 1; + k_error_flag.template view()() = 1; } if (TABSTYLE == LOOKUP) { @@ -337,7 +337,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); jtable = static_cast (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } //A_i = tb->f[itable]; A_i = d_table_const.f(tidx,itable); @@ -355,7 +355,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta); jtable = static_cast ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx)); if (itable >= tlm1 || jtable >= tlm1){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } if(itable<0) itable=0; if(itable>=tlm1) itable=tlm1; @@ -380,7 +380,7 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute()() = 3; if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpair; else fpair = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*fpair; @@ -411,14 +411,14 @@ void PairMultiLucyRXKokkos::operator()(TagPairMultiLucyRXCompute= tlm1){ - k_error_flag.d_view() = 2; + k_error_flag.template view()() = 2; } if(itable==0) fraction_i=0.0; //else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta); else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx)); //evdwl = tb->e[itable] + fraction_i*tb->de[itable]; evdwl = d_table_const.e(tidx,itable) + fraction_i*d_table_const.de(tidx,itable); - } else k_error_flag.d_view() = 3; + } else k_error_flag.template view()() = 3; evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0; evdwlOld = mixWtSite1old_i*evdwl; diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index 8556319531..b8ced4c847 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -167,8 +167,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX { DAT::tdual_efloat_1d k_eatom; DAT::tdual_virial_array k_vatom; - DAT::t_efloat_1d d_eatom; - DAT::t_virial_array d_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; typename AT::t_neighbors_2d d_neighbors; typename AT::t_int_1d_randomread d_ilist; From 1e16fed9ab94435321f81188abbe001f2320e1b8 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 25 Aug 2017 15:16:19 -0600 Subject: [PATCH 255/267] Error out if using pair hybrid with Kokkos, but not pair hybrid/overlay --- src/pair_hybrid.cpp | 3 +++ src/pair_hybrid.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 4a98cca614..751560deff 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -379,6 +379,9 @@ void PairHybrid::coeff(int narg, char **arg) if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); + if (lmp->kokkos) + error->all(FLERR,"Cannot yet use pair hybrid with Kokkos"); + int ilo,ihi,jlo,jhi; force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); diff --git a/src/pair_hybrid.h b/src/pair_hybrid.h index 2364b16f46..463ae00eca 100644 --- a/src/pair_hybrid.h +++ b/src/pair_hybrid.h @@ -90,6 +90,10 @@ class PairHybrid : public Pair { /* ERROR/WARNING messages: +E: Cannot yet use pair hybrid with Kokkos + +This feature is not yet supported. + E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the From b73999ef215c101c24c54627102f2fc44cfd581b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 25 Aug 2017 15:25:41 -0600 Subject: [PATCH 256/267] Revert change to read_data.cpp --- src/read_data.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/read_data.cpp b/src/read_data.cpp index 6f0a229ed9..b1a42608c0 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -50,7 +50,7 @@ using namespace LAMMPS_NS; #define MAXLINE 256 #define LB_FACTOR 1.1 -#define CHUNK 4096 +#define CHUNK 1024 #define DELTA 4 // must be 2 or larger #define MAXBODY 32 // max # of lines in one body @@ -1904,12 +1904,8 @@ void ReadData::open(char *file) if (!compressed) fp = fopen(file,"r"); else { #ifdef LAMMPS_GZIP - char gunzip[2048]; - // Use taskset to force the gzip process to NOT run on the 0th "CPU", which should - // keep it from thrashing with the MPI rank zero process (the one reading the pipe). - // This is Linux specific, and the 1023 upper range might also be system specific. - // Use of something like hwloc would be more portable... but more complicated. - sprintf(gunzip,"taskset -c 1-1023 gzip -c -d %s",file); + char gunzip[128]; + sprintf(gunzip,"gzip -c -d %s",file); #ifdef _WIN32 fp = _popen(gunzip,"rb"); From 1f8c4f2c62dfaf24340673a99175cf84e478c698 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 25 Aug 2017 15:31:00 -0600 Subject: [PATCH 257/267] Remove hardcoded map variables and debug output --- src/read_restart.cpp | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/src/read_restart.cpp b/src/read_restart.cpp index fcbd8d186d..82583bfe01 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -212,17 +212,6 @@ void ReadRestart::command(int narg, char **arg) int perAtomSize = avec->size_restart(); // ...so we can get its size atom->nlocal = 0; // restore nlocal to zero atoms int atomCt = (int) (assignedChunkSize / perAtomSize); -//#define DEBUG_PRE_GROW -#ifdef DEBUG_PRE_GROW -fprintf(stdout, "ReadRestart::command %04d: pAS %d, aCt %d, nmax %d, chunckSize %12.0f, %12.0f\n" - ,me - ,perAtomSize - ,atomCt - ,atom->nmax - ,(double) assignedChunkSize - ,((double) perAtomSize) * atomCt -); -#endif if (atomCt > atom->nmax) avec->grow(atomCt); } m = 0; @@ -905,10 +894,8 @@ void ReadRestart::header(int incompatible) atom->tag_enable = read_int(); } else if (flag == ATOM_MAP_STYLE) { atom->map_style = read_int(); - atom->map_style = 0; } else if (flag == ATOM_MAP_USER) { atom->map_user = read_int(); - atom->map_user = 0; } else if (flag == ATOM_SORTFREQ) { atom->sortfreq = read_int(); } else if (flag == ATOM_SORTBIN) { @@ -1068,22 +1055,7 @@ void ReadRestart::file_layout() nproc_chunk_sizes[ndx] = base_ct * perAtomSize; current_ByteOffset += base_ByteOffset; } -//#define DEBUG_FILE_LAYOUT -#ifdef DEBUG_FILE_LAYOUT -fprintf(stdout, "ReadRestart::file_layout: %15.0f/%d = %15.0f totCt, %15.0f natoms, %12.0f baseCt, %12.0f leftover, %d np != %d npf %c%c\n" - ,(double) total_size - ,perAtomSize - ,(double) total_ct - ,(double) atom->natoms - ,(double) base_ct - ,(double) leftover_ct - ,nprocs - ,nprocs_file - ,(total_size == (total_ct * perAtomSize)) ? ' ' : 'E' - ,(total_ct == (base_ct * nprocs + leftover_ct)) ? ' ' : 'F' -); -#endif - } else { // Bummer, we have to read in based on how it was written + } else { // we have to read in based on how it was written int init_chunk_number = nprocs_file/nprocs; int num_extra_chunks = nprocs_file - (nprocs*init_chunk_number); From e52a28f8afaf404ae84f3b377a6a060a9d43fe17 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 25 Aug 2017 16:20:42 -0600 Subject: [PATCH 258/267] Update docs for Kokkos version of USER-DPD package --- doc/src/fix_dpd_energy.txt | 24 ++++++++++++++++++++++++ doc/src/fix_eos_table_rx.txt | 24 ++++++++++++++++++++++++ doc/src/fix_rx.txt | 24 ++++++++++++++++++++++++ doc/src/fix_shardlow.txt | 24 ++++++++++++++++++++++++ doc/src/fix_wall.txt | 26 ++++++++++++++++++++++++++ doc/src/pair_dpd_fdt.txt | 24 ++++++++++++++++++++++++ doc/src/pair_exp6_rx.txt | 26 ++++++++++++++++++++++++++ doc/src/pair_hybrid.txt | 1 + doc/src/pair_multi_lucy_rx.txt | 24 ++++++++++++++++++++++++ doc/src/pair_table_rx.txt | 24 ++++++++++++++++++++++++ 10 files changed, 221 insertions(+) diff --git a/doc/src/fix_dpd_energy.txt b/doc/src/fix_dpd_energy.txt index ed49e5a671..1c10d954d6 100644 --- a/doc/src/fix_dpd_energy.txt +++ b/doc/src/fix_dpd_energy.txt @@ -7,6 +7,7 @@ :line fix dpd/energy command :h3 +fix dpd/energy/kk command :h3 [Syntax:] @@ -46,6 +47,29 @@ examples/USER/dpd directory. :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/fix_eos_table_rx.txt b/doc/src/fix_eos_table_rx.txt index e5e4f772f6..0c87874347 100644 --- a/doc/src/fix_eos_table_rx.txt +++ b/doc/src/fix_eos_table_rx.txt @@ -7,6 +7,7 @@ :line fix eos/table/rx command :h3 +fix eos/table/rx/kk command :h3 [Syntax:] @@ -152,6 +153,29 @@ no 0.93 0.00 0.000 -1.76 :pre :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/fix_rx.txt b/doc/src/fix_rx.txt index 6a800f3865..0810a34740 100644 --- a/doc/src/fix_rx.txt +++ b/doc/src/fix_rx.txt @@ -7,6 +7,7 @@ :line fix rx command :h3 +fix rx/kk command :h3 [Syntax:] @@ -182,6 +183,29 @@ read_data data.dpd fix foo_SPECIES NULL Species :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/fix_shardlow.txt b/doc/src/fix_shardlow.txt index 8354b4c41c..24726d8610 100644 --- a/doc/src/fix_shardlow.txt +++ b/doc/src/fix_shardlow.txt @@ -7,6 +7,7 @@ :line fix shardlow command :h3 +fix shardlow/kk command :h3 [Syntax:] @@ -52,6 +53,29 @@ examples/USER/dpd directory. :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/fix_wall.txt b/doc/src/fix_wall.txt index 6d76956620..6bbfccf9db 100644 --- a/doc/src/fix_wall.txt +++ b/doc/src/fix_wall.txt @@ -7,6 +7,7 @@ :line fix wall/lj93 command :h3 +fix wall/lj93/kk command :h3 fix wall/lj126 command :h3 fix wall/lj1043 command :h3 fix wall/colloid command :h3 @@ -277,6 +278,31 @@ the total potential energy of the system (the quantity being minimized), you MUST enable the "fix_modify"_fix_modify.html {energy} option for this fix. +:line + +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] none [Related commands:] diff --git a/doc/src/pair_dpd_fdt.txt b/doc/src/pair_dpd_fdt.txt index b75e7c323c..867f3f2315 100644 --- a/doc/src/pair_dpd_fdt.txt +++ b/doc/src/pair_dpd_fdt.txt @@ -8,6 +8,7 @@ pair_style dpd/fdt command :h3 pair_style dpd/fdt/energy command :h3 +pair_style dpd/fdt/energy/kk command :h3 [Syntax:] @@ -125,6 +126,29 @@ significantly larger timesteps to be taken. :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] These commands are part of the USER-DPD package. They are only diff --git a/doc/src/pair_exp6_rx.txt b/doc/src/pair_exp6_rx.txt index cbc17d357d..7eafa23543 100644 --- a/doc/src/pair_exp6_rx.txt +++ b/doc/src/pair_exp6_rx.txt @@ -7,6 +7,7 @@ :line pair_style exp6/rx command :h3 +pair_style exp6/rx/kk command :h3 [Syntax:] @@ -147,6 +148,31 @@ This style does not support the pair_modify tail option for adding long-range tail corrections to energy and pressure for the A,C terms in the pair interaction. +:line + +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/pair_hybrid.txt b/doc/src/pair_hybrid.txt index fc1824cf62..d37dedc709 100644 --- a/doc/src/pair_hybrid.txt +++ b/doc/src/pair_hybrid.txt @@ -10,6 +10,7 @@ pair_style hybrid command :h3 pair_style hybrid/omp command :h3 pair_style hybrid/overlay command :h3 pair_style hybrid/overlay/omp command :h3 +pair_style hybrid/overlay/kk command :h3 [Syntax:] diff --git a/doc/src/pair_multi_lucy_rx.txt b/doc/src/pair_multi_lucy_rx.txt index 77ed223e2a..57abcf4a4c 100644 --- a/doc/src/pair_multi_lucy_rx.txt +++ b/doc/src/pair_multi_lucy_rx.txt @@ -7,6 +7,7 @@ :line pair_style multi/lucy/rx command :h3 +pair_style multi/lucy/rx/kk command :h3 [Syntax:] @@ -200,6 +201,29 @@ This pair style can only be used via the {pair} keyword of the :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if diff --git a/doc/src/pair_table_rx.txt b/doc/src/pair_table_rx.txt index f93af21da4..cd3a7ef31b 100644 --- a/doc/src/pair_table_rx.txt +++ b/doc/src/pair_table_rx.txt @@ -7,6 +7,7 @@ :line pair_style table/rx command :h3 +pair_style table/rx/kk command :h3 [Syntax:] @@ -223,6 +224,29 @@ This pair style can only be used via the {pair} keyword of the :line +Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are +functionally the same as the corresponding style without the suffix. +They have been optimized to run faster, depending on your available +hardware, as discussed in "Section 5"_Section_accelerate.html +of the manual. The accelerated styles take the same arguments and +should produce the same results, except for round-off and precision +issues. + +These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, +USER-OMP and OPT packages, respectively. They are only enabled if +LAMMPS was built with those packages. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +You can specify the accelerated styles explicitly in your input script +by including their suffix, or you can use the "-suffix command-line +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can +use the "suffix"_suffix.html command in your input script. + +See "Section 5"_Section_accelerate.html of the manual for +more instructions on how to use the accelerated styles effectively. + +:line + [Restrictions:] This command is part of the USER-DPD package. It is only enabled if From a4a45f1d9cfbd52f41b69dc160b73a316dad2229 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 29 Aug 2017 17:25:13 -0600 Subject: [PATCH 259/267] Remove unnecessary check in npair_kokkos --- src/KOKKOS/npair_kokkos.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 462a4b8424..d5ea8376f6 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -173,12 +173,6 @@ void NPairKokkos::build(NeighList *list_) data.special_flag[2] = special_flag[2]; data.special_flag[3] = special_flag[3]; - if(list->d_neighbors.dimension_0()d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs); - list->d_numneigh = typename ArrayTypes::t_int_1d("numneigh", nall*1.1); - data.neigh_list.d_neighbors = list->d_neighbors; - data.neigh_list.d_numneigh = list->d_numneigh; - } data.h_resize()=1; while(data.h_resize()) { data.h_new_maxneighs() = list->maxneighs; From f2d8c37f27b142606741780f436f250da74bab09 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 30 Aug 2017 10:24:29 -0500 Subject: [PATCH 260/267] Rename SSA specific debug #ifdef to DEBUG_SSA_PAIR_CT --- src/KOKKOS/fix_shardlow_kokkos.cpp | 14 +++++++------- src/KOKKOS/fix_shardlow_kokkos.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index e3d9723c53..a09c8bc1ba 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -102,7 +102,7 @@ FixShardlowKokkos::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a if(/* k_pairDPD == NULL &&*/ k_pairDPDE == NULL) error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk"); -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3); d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32); #ifndef KOKKOS_USE_CUDA_UVM @@ -319,7 +319,7 @@ void FixShardlowKokkos::ssa_update_dpd( const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); else Kokkos::atomic_increment(&(d_counters(0, 1))); Kokkos::atomic_increment(&(d_counters(0, 2))); @@ -332,7 +332,7 @@ void FixShardlowKokkos::ssa_update_dpd( // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); else Kokkos::atomic_increment(&(d_counters(1, 1))); Kokkos::atomic_increment(&(d_counters(1, 2))); @@ -475,7 +475,7 @@ void FixShardlowKokkos::ssa_update_dpde( const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0))); else Kokkos::atomic_increment(&(d_counters(0, 1))); Kokkos::atomic_increment(&(d_counters(0, 2))); @@ -488,7 +488,7 @@ void FixShardlowKokkos::ssa_update_dpde( // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype))) && (rsq >= EPSILON_SQUARED)) { -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0))); else Kokkos::atomic_increment(&(d_counters(1, 1))); Kokkos::atomic_increment(&(d_counters(1, 2))); @@ -665,7 +665,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) maxRNG = maxWorkItemCt; } -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT for (int i = 0; i < 2; ++i) for (int j = 0; j < 3; ++j) h_counters(i,j) = 0; @@ -734,7 +734,7 @@ void FixShardlowKokkos::initial_integrate(int vflag) } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT deep_copy(h_counters, d_counters); deep_copy(h_hist, d_hist); for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", h_hist[i]); diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 3dbbaaa61c..1ff94d5eec 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -74,7 +74,7 @@ class FixShardlowKokkos : public FixShardlow { KOKKOS_INLINE_FUNCTION void operator()(TagFixShardlowSSAUpdateDPDEGhost, const int&) const; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT typename AT::t_int_2d d_counters; typename HAT::t_int_2d h_counters; typename AT::t_int_1d d_hist; From 2dd202cc761de7afb9419294a52652edc8cf278d Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 30 Aug 2017 10:34:09 -0500 Subject: [PATCH 261/267] USER-DPD: remove some out-of-date FIXME comments in fix_shardlow_kokkos.cpp --- src/KOKKOS/fix_shardlow_kokkos.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index a09c8bc1ba..ea7cc21fff 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -164,17 +164,13 @@ void FixShardlowKokkos::init() k_params = Kokkos::DualView ("FixShardlowKokkos::params",ntypes+1,ntypes+1); params = k_params.template view(); -//FIXME either create cutsq and fill it in, or just point to pairDPD's... -// memory->destroy(cutsq); //FIXME -// memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"FixShardlowKokkos:cutsq"); k_pairDPDE->k_cutsq.template sync(); - d_cutsq = k_pairDPDE->k_cutsq.template view(); //FIXME + d_cutsq = k_pairDPDE->k_cutsq.template view(); const double boltz2 = 2.0*force->boltz; for (int i = 1; i <= ntypes; i++) { for (int j = i; j <= ntypes; j++) { F_FLOAT cutone = k_pairDPDE->cut[i][j]; -// k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; //FIXME if (cutone > EPSILON) k_params.h_view(i,j).cutinv = 1.0/cutone; else k_params.h_view(i,j).cutinv = FLT_MAX; k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j]; @@ -190,7 +186,6 @@ void FixShardlowKokkos::init() } } - // k_cutsq.template modify(); k_params.template modify(); } From dc7f1281b83214d360cac981b33fdca2d559f935 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 30 Aug 2017 11:01:17 -0500 Subject: [PATCH 262/267] USER-DPD: Kokkos version of Const Temperature DPD isn't implemented yet The Constant Energy DPD (DPDE) was our primary usage case, so only stubs for the Constant Temperature case were included in Kokkos code so far. The non-Kokkos version works fine for Constant Temperature DPD. --- src/KOKKOS/fix_shardlow_kokkos.cpp | 3 ++- src/KOKKOS/fix_shardlow_kokkos.h | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index ea7cc21fff..98bbb02714 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -265,7 +265,8 @@ void FixShardlowKokkos::setup_pre_neighbor() /* ---------------------------------------------------------------------- */ -#ifdef NOTNOW +#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE +#error "FixShardlowKokkos::ssa_update_dpd() is not functional yet - TIM 20170830" /* ---------------------------------------------------------------------- Perform the stochastic integration and Shardlow update for constant temperature Allow for both per-type and per-atom mass diff --git a/src/KOKKOS/fix_shardlow_kokkos.h b/src/KOKKOS/fix_shardlow_kokkos.h index 1ff94d5eec..70dccf2e2d 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.h +++ b/src/KOKKOS/fix_shardlow_kokkos.h @@ -26,6 +26,9 @@ FixStyle(shardlow/kk/host,FixShardlowKokkos) #include "fix_shardlow.h" #include "kokkos_type.h" #include "neigh_list_kokkos.h" +#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE +#include "pair_dpd_fdt_kokkos.h" +#endif #include "pair_dpd_fdt_energy_kokkos.h" namespace LAMMPS_NS { @@ -85,7 +88,9 @@ class FixShardlowKokkos : public FixShardlow { int workPhase; double theta_ij_inv,boltz_inv,ftm2v,dt; -// class PairDPDfdt *pairDPD; +#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE +// class PairDPDfdt *pairDPD; FIXME as per k_pairDPDE below +#endif PairDPDfdtEnergyKokkos *k_pairDPDE; int maxRNG; @@ -138,8 +143,11 @@ class FixShardlowKokkos : public FixShardlow { typename AT::t_int_2d ssa_gitemLoc, ssa_gitemLen; -// template -// void ssa_update_dpd(int, int); // Constant Temperature +#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE + template + KOKKOS_INLINE_FUNCTION + void ssa_update_dpd(int, int, int) const; // Constant Temperature +#endif template KOKKOS_INLINE_FUNCTION void ssa_update_dpde(int, int, int) const; // Constant Energy From 3e6cdd1400117c1a99008f9649e05fdcd10fc6e1 Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 30 Aug 2017 14:58:38 -0500 Subject: [PATCH 263/267] USER-DPD: finish renaming #ifdef DEBUG_PAIR_CT to DEBUG_SSA_PAIR_CT --- src/USER-DPD/fix_shardlow.cpp | 16 ++++++++-------- src/USER-DPD/fix_shardlow.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index f3057a6563..cec53ab15f 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -212,7 +212,7 @@ void FixShardlow::ssa_update_dpd( const double mass_i = (rmass) ? rmass[i] : mass[itype]; const double massinv_i = 1.0 / mass_i; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT const int nlocal = atom->nlocal; #endif @@ -225,7 +225,7 @@ void FixShardlow::ssa_update_dpd( double dely = ytmp - x[j][1]; double delz = ztmp - x[j][2]; double rsq = delx*delx + dely*dely + delz*delz; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]); else ++(counters[0][1]); ++(counters[0][2]); @@ -237,7 +237,7 @@ void FixShardlow::ssa_update_dpd( // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) { -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]); else ++(counters[1][1]); ++(counters[1][2]); @@ -369,7 +369,7 @@ void FixShardlow::ssa_update_dpde( const double massinv_i = 1.0 / mass_i; const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT const int nlocal = atom->nlocal; #endif @@ -382,7 +382,7 @@ void FixShardlow::ssa_update_dpde( double dely = ytmp - x[j][1]; double delz = ztmp - x[j][2]; double rsq = delx*delx + dely*dely + delz*delz; -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]); else ++(counters[0][1]); ++(counters[0][2]); @@ -394,7 +394,7 @@ void FixShardlow::ssa_update_dpde( // NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) { -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]); else ++(counters[1][1]); ++(counters[1][2]); @@ -530,7 +530,7 @@ void FixShardlow::initial_integrate(int vflag) error->one(FLERR, msg); } -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT for (int i = 0; i < 2; ++i) for (int j = 0; j < 3; ++j) counters[i][j] = 0; @@ -598,7 +598,7 @@ void FixShardlow::initial_integrate(int vflag) } //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", hist[i]); fprintf(stdout, "\n%6d %6d,%6d %6d: " ,counters[0][2] diff --git a/src/USER-DPD/fix_shardlow.h b/src/USER-DPD/fix_shardlow.h index e87ae3c9cf..e8e5f484a0 100644 --- a/src/USER-DPD/fix_shardlow.h +++ b/src/USER-DPD/fix_shardlow.h @@ -38,7 +38,7 @@ class FixShardlow : public Fix { double memory_usage(); -#ifdef DEBUG_PAIR_CT +#ifdef DEBUG_SSA_PAIR_CT int counters[2][3]; int hist[32]; #endif From d95a5f219e69c41461971912130a20c4c5b11efd Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 31 Aug 2017 10:38:36 -0600 Subject: [PATCH 264/267] Remove all thread fences except one in verlet_kokkos --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_bond_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_charge_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_full_kokkos.cpp | 16 ---------------- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 16 ---------------- src/KOKKOS/comm_kokkos.cpp | 7 ------- src/KOKKOS/domain_kokkos.cpp | 6 ------ src/KOKKOS/nbin_kokkos.cpp | 2 -- src/KOKKOS/nbin_ssa_kokkos.cpp | 4 ---- src/KOKKOS/neigh_bond_kokkos.cpp | 11 ----------- src/KOKKOS/neighbor_kokkos.cpp | 2 -- src/KOKKOS/npair_kokkos.cpp | 1 - src/KOKKOS/npair_ssa_kokkos.cpp | 1 - src/KOKKOS/region_block_kokkos.cpp | 1 - 16 files changed, 147 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 34b868aadc..05414cf2e4 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -308,7 +308,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -336,7 +335,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -430,7 +428,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -463,7 +460,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -501,13 +497,11 @@ void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -753,13 +747,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecAngleKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -769,13 +761,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecAngleKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*size_border; @@ -977,12 +967,10 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecAngleKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecAngleKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,d_molecule,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1241,13 +1229,11 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_ AtomVecAngleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*elements; } else { AtomVecAngleKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*elements; } } @@ -1405,7 +1391,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1414,7 +1399,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n AtomVecAngleKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index d040bd3553..b63dc5fb8c 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -224,7 +224,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -252,7 +251,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -340,7 +338,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -369,7 +366,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -407,13 +403,11 @@ void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecAtomicKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecAtomicKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -655,13 +649,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecAtomicKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -671,13 +663,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecAtomicKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*6; @@ -853,11 +843,9 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, if(space==Host) { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1009,12 +997,10 @@ int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat if(space == Host) { AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*11; } else { AtomVecAtomicKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*11; } } @@ -1106,7 +1092,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.h_view(0) = nlocal; AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/11,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1114,7 +1099,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.sync(); AtomVecAtomicKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/11,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index c46c49cb29..e0f29a27bb 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -266,7 +266,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -294,7 +293,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -382,7 +380,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -411,7 +408,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -449,13 +445,11 @@ void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecBondKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecBondKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -701,13 +695,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecBondKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -717,13 +709,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecBondKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*size_border; @@ -925,12 +915,10 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecBondKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecBondKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,d_molecule,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1157,13 +1145,11 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 AtomVecBondKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*elements; } else { AtomVecBondKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*elements; } } @@ -1299,7 +1285,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr AtomVecBondKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1308,7 +1293,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr AtomVecBondKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 856660d1e9..89f7e91c2b 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -236,7 +236,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -264,7 +263,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -352,7 +350,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -381,7 +378,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -419,13 +415,11 @@ void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecChargeKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecChargeKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -669,13 +663,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecChargeKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -685,13 +677,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecChargeKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*size_border; @@ -890,12 +880,10 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecChargeKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_q,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecChargeKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,d_q,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); } @@ -1078,13 +1066,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat AtomVecChargeKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*12; } else { AtomVecChargeKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*12; } } @@ -1181,7 +1167,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.h_view(0) = nlocal; AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/12,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1190,7 +1175,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int AtomVecChargeKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/12,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 2090e924ec..c4e493bd85 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -298,7 +298,6 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { @@ -334,7 +333,6 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -443,7 +441,6 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); @@ -480,7 +477,6 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -534,7 +530,6 @@ void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); @@ -542,7 +537,6 @@ void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -840,7 +834,6 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecDPDKokkos_PackBorder f( buf.view(), k_sendlist.view(), @@ -848,7 +841,6 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -860,7 +852,6 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecDPDKokkos_PackBorder f( buf.view(), k_sendlist.view(), @@ -868,7 +859,6 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*6; @@ -1146,14 +1136,12 @@ void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew, first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecDPDKokkos_UnpackBorder f(buf.view(), d_x,d_tag,d_type,d_mask, d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew, first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1399,11 +1387,9 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d if(space == Host) { AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); } else { AtomVecDPDKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); } return nsend*17; } @@ -1518,14 +1504,12 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre k_count.h_view(0) = nlocal; AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/17,f); - LMPHostType::fence(); } else { k_count.h_view(0) = nlocal; k_count.modify(); k_count.sync(); AtomVecDPDKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/17,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index fa4cf18ae3..fd7eaf7c81 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -396,7 +396,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -424,7 +423,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -515,7 +513,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -544,7 +541,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -582,13 +578,11 @@ void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecFullKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecFullKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -838,13 +832,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecFullKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -854,13 +846,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecFullKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*size_border; @@ -1071,12 +1061,10 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, struct AtomVecFullKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecFullKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1422,13 +1410,11 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2 AtomVecFullKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*elements; } else { AtomVecFullKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*elements; } } @@ -1643,7 +1629,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1652,7 +1637,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr AtomVecFullKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 5c16ac1513..dbf6a857b2 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -387,7 +387,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); if(pbc_flag) { @@ -415,7 +414,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*size_forward; @@ -506,7 +504,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d Kokkos::parallel_for(n,f); } } - LMPHostType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); @@ -535,7 +532,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d Kokkos::parallel_for(n,f); } } - LMPDeviceType::fence(); } return n*3; } @@ -573,13 +569,11 @@ void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first, modified(Host,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } else { sync(Device,X_MASK); modified(Device,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -825,13 +819,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecMolecularKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } else { @@ -841,13 +833,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli buf.view(), k_sendlist.view(), iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { AtomVecMolecularKokkos_PackBorder f( buf.view(), k_sendlist.view(), iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } return n*size_border; @@ -1049,12 +1039,10 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first struct AtomVecMolecularKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); Kokkos::parallel_for(n,f); - LMPHostType::fence(); } else { struct AtomVecMolecularKokkos_UnpackBorder f(buf.view(),d_x,d_tag,d_type,d_mask,d_molecule,first); Kokkos::parallel_for(n,f); - LMPDeviceType::fence(); } } @@ -1389,13 +1377,11 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl AtomVecMolecularKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPHostType::fence(); return nsend*elements; } else { AtomVecMolecularKokkos_PackExchangeFunctor f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi); Kokkos::parallel_for(nsend,f); - LMPDeviceType::fence(); return nsend*elements; } } @@ -1608,7 +1594,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i AtomVecMolecularKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPHostType::fence(); return k_count.h_view(0); } else { k_count.h_view(0) = nlocal; @@ -1617,7 +1602,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i AtomVecMolecularKokkos_UnpackExchangeFunctor f(atomKK,k_buf,k_count,dim,lo,hi); Kokkos::parallel_for(nrecv/elements,f); - LMPDeviceType::fence(); k_count.modify(); k_count.sync(); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 2b19908396..da1f4a89fe 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -499,7 +499,6 @@ void CommKokkos::exchange_device() f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, nlocal,dim,lo,hi); Kokkos::parallel_for(nlocal,f); - DeviceType::fence(); k_exchange_sendlist.modify(); k_sendflag.modify(); k_count.modify(); @@ -535,7 +534,6 @@ void CommKokkos::exchange_device() k_exchange_sendlist,k_exchange_copylist, ExecutionSpaceFromDevice:: space,dim,lo,hi); - DeviceType::fence(); } else { while (i < nlocal) { @@ -560,7 +558,6 @@ void CommKokkos::exchange_device() atom->nlocal=avec-> unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); - DeviceType::fence(); } } else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, @@ -593,7 +590,6 @@ void CommKokkos::exchange_device() atom->nlocal = avec-> unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, ExecutionSpaceFromDevice::space); - DeviceType::fence(); } } @@ -765,7 +761,6 @@ void CommKokkos::borders_device() { total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - DeviceType::fence(); total_send.template modify(); total_send.template sync(); @@ -782,7 +777,6 @@ void CommKokkos::borders_device() { total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - DeviceType::fence(); total_send.template modify(); total_send.template sync(); } @@ -911,7 +905,6 @@ void CommKokkos::borders_device() { if (exec_space == Host) k_sendlist.sync(); atomKK->modified(exec_space,ALL_MASK); - DeviceType::fence(); atomKK->sync(Host,TAG_MASK); if (map_style) atom->map_set(); } diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index 4bf8dc9841..d9c1332778 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -99,7 +99,6 @@ void DomainKokkos::reset_box() DomainResetBoxFunctor f(atomKK->k_x); Kokkos::parallel_reduce(nlocal,f,result); - LMPDeviceType::fence(); double (*extent)[2] = result.value; double all[3][2]; @@ -384,7 +383,6 @@ void DomainKokkos::pbc() Kokkos::parallel_for(nlocal,f); } } - LMPDeviceType::fence(); atomKK->modified(Device,X_MASK|V_MASK|IMAGE_MASK); } @@ -424,7 +422,6 @@ void DomainKokkos::remap_all() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - LMPDeviceType::fence(); copymode = 0; atomKK->modified(Device,X_MASK | IMAGE_MASK); @@ -528,7 +525,6 @@ void DomainKokkos::image_flip(int m_in, int n_in, int p_in) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - LMPDeviceType::fence(); copymode = 0; atomKK->modified(Device,IMAGE_MASK); @@ -561,7 +557,6 @@ void DomainKokkos::lamda2x(int n) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); - LMPDeviceType::fence(); copymode = 0; atomKK->modified(Device,X_MASK); @@ -587,7 +582,6 @@ void DomainKokkos::x2lamda(int n) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); - LMPDeviceType::fence(); copymode = 0; atomKK->modified(Device,X_MASK); diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index 5e41787247..c7e815928a 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -95,7 +95,6 @@ void NBinKokkos::bin_atoms() MemsetZeroFunctor f_zero; f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); Kokkos::parallel_for(mbins, f_zero); - DeviceType::fence(); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); x = atomKK->k_x.view(); @@ -106,7 +105,6 @@ void NBinKokkos::bin_atoms() NPairKokkosBinAtomsFunctor f(*this); Kokkos::parallel_for(atom->nlocal+atom->nghost, f); - DeviceType::fence(); deep_copy(h_resize, d_resize); if(h_resize()) { diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index 883ba25b24..ab97cb5848 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -152,7 +152,6 @@ void NBinSSAKokkos::bin_atoms() for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0; k_gbincount.modify(); k_gbincount.sync(); - DeviceType::fence(); // FIXME? ghosts_per_gbin = 0; NPairSSAKokkosBinIDGhostsFunctor f(*this); Kokkos::parallel_reduce(Kokkos::RangePolicy(nlocal,nall), f, ghosts_per_gbin); @@ -167,7 +166,6 @@ void NBinSSAKokkos::bin_atoms() for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0; k_gbincount.modify(); k_gbincount.sync(); - DeviceType::fence(); // FIXME? auto binID_ = binID; auto gbincount_ = gbincount; @@ -198,7 +196,6 @@ void NBinSSAKokkos::bin_atoms() MemsetZeroFunctor f_zero; f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); Kokkos::parallel_for(mbins, f_zero); - DeviceType::fence(); auto bincount_ = bincount; auto bins_ = bins; @@ -210,7 +207,6 @@ void NBinSSAKokkos::bin_atoms() LAMMPS_LAMBDA (const int i) { sortBin(bincount_, bins_, i); }); - DeviceType::fence(); } k_bins.modify(); k_bincount.modify(); diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp index a8c230fa59..a674e7cec4 100644 --- a/src/KOKKOS/neigh_bond_kokkos.cpp +++ b/src/KOKKOS/neigh_bond_kokkos.cpp @@ -274,7 +274,6 @@ void NeighBondKokkos::bond_all() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -370,7 +369,6 @@ void NeighBondKokkos::bond_partial() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -443,7 +441,6 @@ void NeighBondKokkos::bond_check() k_bondlist.sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,neighbor->nbondlist),*this,flag); - DeviceType::fence(); int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); @@ -494,7 +491,6 @@ void NeighBondKokkos::angle_all() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -597,7 +593,6 @@ void NeighBondKokkos::angle_partial() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -678,7 +673,6 @@ void NeighBondKokkos::angle_check() k_anglelist.sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,neighbor->nanglelist),*this,flag); - DeviceType::fence(); int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); @@ -741,7 +735,6 @@ void NeighBondKokkos::dihedral_all() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -849,7 +842,6 @@ void NeighBondKokkos::dihedral_partial() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -935,7 +927,6 @@ void NeighBondKokkos::dihedral_check(int nlist, typename AT::t_int_2 k_dihedrallist.sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlist),*this,flag); - DeviceType::fence(); int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); @@ -1015,7 +1006,6 @@ void NeighBondKokkos::improper_all() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); @@ -1123,7 +1113,6 @@ void NeighBondKokkos::improper_partial() k_fail_flag.template sync(); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - DeviceType::fence(); k_nlist.template modify(); k_nlist.template sync(); diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 8eda7ee55c..9a40808052 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -206,7 +206,6 @@ int NeighborKokkos::check_distance_kokkos() int flag = 0; copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlocal),*this,flag); - DeviceType::fence(); copymode = 0; int flagall; @@ -273,7 +272,6 @@ void NeighborKokkos::build_kokkos(int topoflag) } copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy >(0,nlocal),*this); - DeviceType::fence(); copymode = 0; xhold.modify(); if (boxcheck) { diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index d5ea8376f6..b568bd5c93 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -214,7 +214,6 @@ void NPairKokkos::build(NeighList *list_) #endif } } - DeviceType::fence(); deep_copy(data.h_resize, data.resize); if(data.h_resize()) { diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index aec482993d..b73e54e33f 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -480,7 +480,6 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; firstTry = false; - DeviceType::fence(); deep_copy(data.h_resize, data.resize); if(data.h_resize()) { diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp index 90fd47ab06..eed4272f23 100644 --- a/src/KOKKOS/region_block_kokkos.cpp +++ b/src/KOKKOS/region_block_kokkos.cpp @@ -67,7 +67,6 @@ void RegBlockKokkos::match_all_kokkos(int groupbit_in, DAT::tdual_in copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); - DeviceType::fence(); copymode = 0; k_match_in.template modify(); From c8f92c1a617a16dafb010458bcb8d7711a1d7b73 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 5 Sep 2017 16:42:58 -0400 Subject: [PATCH 265/267] add a couple deleted files from USER-DPD to Purge.list --- src/Purge.list | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Purge.list b/src/Purge.list index 15d20fc71a..d6f5010d49 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -16,6 +16,9 @@ style_region.h style_neigh_bin.h style_neigh_pair.h style_neigh_stencil.h +# deleted on 5 September 2017 +npair_halffull_newton_ssa.cpp +npair_halffull_newton_ssa.f # deleted on 6 June 2017 pair_lj_sf.cpp pair_lj_sf.h From 4c5d901e2b0489ae8cb23a5d320ead5803c120ba Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 5 Sep 2017 16:45:03 -0400 Subject: [PATCH 266/267] fix stupid typo (too much compiling of fortran codes...) --- src/Purge.list | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Purge.list b/src/Purge.list index d6f5010d49..4ccde5f4b5 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -18,7 +18,7 @@ style_neigh_pair.h style_neigh_stencil.h # deleted on 5 September 2017 npair_halffull_newton_ssa.cpp -npair_halffull_newton_ssa.f +npair_halffull_newton_ssa.h # deleted on 6 June 2017 pair_lj_sf.cpp pair_lj_sf.h From 0248a7b98e9eac8ef560b70141da7a50cf5f779a Mon Sep 17 00:00:00 2001 From: Tim Mattox Date: Wed, 6 Sep 2017 09:24:05 -0500 Subject: [PATCH 267/267] remove duplicate listing of deleted USER-DPD files from Purge.list --- src/Purge.list | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Purge.list b/src/Purge.list index 4ccde5f4b5..315e5e3424 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -45,9 +45,6 @@ fix_reax_c_bonds_kokkos.cpp fix_reax_c_bonds_kokkos.h fix_reax_c_species_kokkos.cpp fix_reax_c_species_kokkos.h -# deleted on 01 Mar 2017 -npair_halffull_newton_ssa.cpp -npair_halffull_newton_ssa.h # deleted on 19 April 2017 vmdplugin.h molfile_plugin.h