From 58905525bfa26e76ef85d87dadff706cb751d098 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 6 Feb 2019 14:42:37 -0700 Subject: [PATCH 01/34] Add team-based calcs to some KOKKOS package pair_styles --- src/KOKKOS/kokkos.cpp | 8 + src/KOKKOS/kokkos.h | 1 + src/KOKKOS/kokkos_type.h | 46 +++++ src/KOKKOS/pair_kokkos.h | 393 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 439 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 9973b5a688..5d041bcbb0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -192,6 +192,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) forward_comm_on_host = 0; reverse_comm_on_host = 0; gpu_direct_flag = 1; + team_flag = 0; #if KOKKOS_USE_CUDA // only if we can safely detect, that GPU-direct is not available, change default @@ -228,6 +229,7 @@ void KokkosLMP::accelerator(int narg, char **arg) exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; gpu_direct_flag = 1; + team_flag = 0; int iarg = 0; while (iarg < narg) { @@ -317,6 +319,12 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"team") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"off") == 0) team_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) team_flag = 1; + else error->all(FLERR,"Illegal package kokkos command"); + iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index cd429d5c1c..a665329d70 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -36,6 +36,7 @@ class KokkosLMP : protected Pointers { int numa; int auto_sync; int gpu_direct_flag; + int team_flag; KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP(); diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index b88c92ff73..16d7c3cbd2 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -448,6 +448,52 @@ struct s_EV_FLOAT_REAX { }; typedef struct s_EV_FLOAT_REAX EV_FLOAT_REAX; +struct s_FEV_FLOAT { + F_FLOAT f[3]; + E_FLOAT evdwl; + E_FLOAT ecoul; + E_FLOAT v[6]; + KOKKOS_INLINE_FUNCTION + s_FEV_FLOAT() { + f[0] = 0; f[1] = 0; f[2] = 0; + evdwl = 0; + ecoul = 0; + v[0] = 0; v[1] = 0; v[2] = 0; + v[3] = 0; v[4] = 0; v[5] = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const s_FEV_FLOAT &rhs) { + f[0] += rhs.f[0]; + f[1] += rhs.f[1]; + f[2] += rhs.f[2]; + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_FEV_FLOAT &rhs) volatile { + f[0] += rhs.f[0]; + f[1] += rhs.f[1]; + f[2] += rhs.f[2]; + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + } +}; +typedef struct s_FEV_FLOAT FEV_FLOAT; + #ifndef PREC_POS #define PREC_POS PRECISION #endif diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index ab616d2c07..8758b2f03c 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -86,6 +86,7 @@ struct PairComputeFunctor { NeighListKokkos* list_ptr): c(*c_ptr),list(*list_ptr) { // allocate duplicated memory + f = c.f; dup_f = Kokkos::Experimental::create_scatter_view::value >(c.f); dup_eatom = Kokkos::Experimental::create_scatter_view::value >(c.d_eatom); dup_vatom = Kokkos::Experimental::create_scatter_view::value >(c.d_vatom); @@ -255,6 +256,329 @@ struct PairComputeFunctor { return ev; } + // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial + // Loop over neighbors of one atom without coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + void compute_item_team(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const NoCoulTag&) const { + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + const int firstatom = team.league_rank()*atoms_per_team; + const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + t_scalar3 fsum; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, t_scalar3& ftmp) { + + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + + ftmp.x += delx*fpair; + ftmp.y += dely*fpair; + ftmp.z += delz*fpair; + } + + },fsum); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fsum.x; + f(i,1) += fsum.y; + f(i,2) += fsum.z; + }); + + }); + } + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial + // Loop over neighbors of one atom with coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + void compute_item_team(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const CoulTag& ) const { + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + int firstatom = team.league_rank()*atoms_per_team; + int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + const F_FLOAT qtmp = c.q(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + t_scalar3 fsum; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, t_scalar3& ftmp) { + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + const F_FLOAT factor_coul = c.special_coul[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + F_FLOAT fpair = F_FLOAT(); + + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + fpair+=factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + fpair+=c.template compute_fcoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + + ftmp.x += delx*fpair; + ftmp.y += dely*fpair; + ftmp.z += delz*fpair; + } + },fsum); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fsum.x; + f(i,1) += fsum.y; + f(i,2) += fsum.z; + }); + }); + } + + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial + // Loop over neighbors of one atom without coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const NoCoulTag&) const { + + EV_FLOAT ev; + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + const int firstatom = team.league_rank()*atoms_per_team; + const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + FEV_FLOAT fev; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, FEV_FLOAT& fev_tmp) { + + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + + fev_tmp.f[0] += delx*fpair; + fev_tmp.f[1] += dely*fpair; + fev_tmp.f[2] += delz*fpair; + + F_FLOAT evdwl = 0.0; + if (c.eflag) { + evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); + fev.evdwl += 0.5*evdwl; + } + if (c.vflag_either) { + fev.v[0] += 0.5*delx*delx*fpair; + fev.v[1] += 0.5*dely*dely*fpair; + fev.v[2] += 0.5*delz*delz*fpair; + fev.v[3] += 0.5*delx*dely*fpair; + fev.v[4] += 0.5*delx*delz*fpair; + fev.v[5] += 0.5*dely*delz*fpair; + } + } + },fev); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fev.f[0]; + f(i,1) += fev.f[1]; + f(i,2) += fev.f[2]; + + if (c.eflag_global) + ev.evdwl += fev.evdwl; + + if (c.eflag_atom) + d_eatom(i,0) += fev.evdwl; + + if (c.vflag_global) { + ev.v[0] += fev.v[0]; + ev.v[1] += fev.v[1]; + ev.v[2] += fev.v[2]; + ev.v[3] += fev.v[3]; + ev.v[4] += fev.v[4]; + ev.v[5] += fev.v[5]; + } + + if (c.vflag_atom) { + d_vatom(i,0) += fev.v[0]; + d_vatom(i,1) += fev.v[1]; + d_vatom(i,2) += fev.v[2]; + d_vatom(i,3) += fev.v[3]; + d_vatom(i,4) += fev.v[4]; + d_vatom(i,5) += fev.v[5]; + } + }); + }); + return ev; + } + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial + // Loop over neighbors of one atom with coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const CoulTag& ) const { + + EV_FLOAT ev; + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + int firstatom = team.league_rank()*atoms_per_team; + int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + const F_FLOAT qtmp = c.q(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + FEV_FLOAT fev; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, FEV_FLOAT& fev_tmp) { + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + const F_FLOAT factor_coul = c.special_coul[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + F_FLOAT fpair = F_FLOAT(); + + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + fpair+=factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + fpair+=c.template compute_fcoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + + fev.f[0] += delx*fpair; + fev.f[1] += dely*fpair; + fev.f[2] += delz*fpair; + + F_FLOAT evdwl = 0.0; + F_FLOAT ecoul = 0.0; + if (c.eflag) { + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) { + evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); + ev.evdwl += 0.5*evdwl; + } + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) { + ecoul = c.template compute_ecoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + ev.ecoul += 0.5*ecoul; + } + } + if (c.vflag) { + fev.v[0] += 0.5*delx*delx*fpair; + fev.v[1] += 0.5*dely*dely*fpair; + fev.v[2] += 0.5*delz*delz*fpair; + fev.v[3] += 0.5*delx*dely*fpair; + fev.v[4] += 0.5*delx*delz*fpair; + fev.v[5] += 0.5*dely*delz*fpair; + } + } + },fev); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fev.f[0]; + f(i,1) += fev.f[1]; + f(i,2) += fev.f[2]; + + if (c.eflag_global) { + ev.evdwl += fev.evdwl; + ev.ecoul += fev.ecoul; + } + + if (c.eflag_atom) + d_eatom(i,0) += fev.evdwl + fev.ecoul; + + if (c.vflag_global) { + ev.v[0] += fev.v[0]; + ev.v[1] += fev.v[1]; + ev.v[2] += fev.v[2]; + ev.v[3] += fev.v[3]; + ev.v[4] += fev.v[4]; + ev.v[5] += fev.v[5]; + } + + if (c.vflag_atom) { + d_vatom(i,0) += fev.v[0]; + d_vatom(i,1) += fev.v[1]; + d_vatom(i,2) += fev.v[2]; + d_vatom(i,3) += fev.v[3]; + d_vatom(i,4) += fev.v[4]; + d_vatom(i,5) += fev.v[5]; + } + }); + }); + return ev; + } + KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, @@ -355,6 +679,16 @@ struct PairComputeFunctor { else energy_virial += compute_item<1,0>(i,list,typename DoCoul::type()); } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const { + compute_item_team(team,list,typename DoCoul::type()); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const { + energy_virial += compute_item_team_ev(team,list,typename DoCoul::type()); + } }; template @@ -489,6 +823,15 @@ struct PairComputeFunctor { void operator()(const int i, value_type &energy_virial) const { energy_virial += compute_item<1,0>(i,list,typename DoCoul::type()); } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const + {} + }; // Filter out Neighflags which are not supported for PairStyle @@ -507,20 +850,52 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable return ev; } +template +int GetTeamSize(FunctorStyle& functor, int team_size, int vector_length) { + int team_size_max = Kokkos::TeamPolicy<>::team_size_max(functor); + +#ifdef KOKKOS_ENABLE_CUDA + if(team_size*vector_length > team_size_max) + team_size = team_size_max/vector_length; +#else + team_size = 1; +#endif + return team_size; +} + // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2 template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { EV_FLOAT ev; - if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - PairComputeFunctor ff(fpair,list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - ff.contribute(); + if (fpair->lmp->kokkos->team_flag) { + int vector_length = 8; + int atoms_per_team = 32; + + if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { + PairComputeFunctor ff(fpair,list); + atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); + else Kokkos::parallel_for(policy,ff); + } else { + PairComputeFunctor ff(fpair,list); + atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); + else Kokkos::parallel_for(policy,ff); + } } else { - PairComputeFunctor ff(fpair,list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - ff.contribute(); + if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { + PairComputeFunctor ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + ff.contribute(); + } else { + PairComputeFunctor ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + ff.contribute(); + } } return ev; } From 0a02097e20354223d2f0a0c52618d125c22f314d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 25 Feb 2019 08:39:54 -0700 Subject: [PATCH 02/34] Add squashed comm forward for Kokkos --- src/KOKKOS/atom_vec_kokkos.cpp | 100 +++++++++++++++++++++++++++++++++ src/KOKKOS/atom_vec_kokkos.h | 7 +++ src/KOKKOS/comm_kokkos.cpp | 43 ++++++++++++-- src/KOKKOS/comm_kokkos.h | 6 ++ 4 files changed, 152 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 83af437eba..6fda61bc31 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -267,6 +267,106 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c return n*3; } + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackCommSelfSquash { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_x_array _xw; + typename ArrayTypes::t_int_2d_const _list; + typename ArrayTypes::t_int_2d_const _pbc; + typename ArrayTypes::t_int_1d_const _pbc_flag; + typename ArrayTypes::t_int_1d_const _firstrecv; + typename ArrayTypes::t_int_1d_const _sendnum_scan; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + + AtomVecKokkos_PackCommSelfSquash( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_int_2d &list, + const typename DAT::tdual_int_2d &pbc, + const typename DAT::tdual_int_1d &pbc_flag, + const typename DAT::tdual_int_1d &firstrecv, + const typename DAT::tdual_int_1d &sendnum_scan, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz): + _x(x.view()),_xw(x.view()), + _list(list.view()), + _pbc(pbc.view()), + _pbc_flag(pbc_flag.view()), + _firstrecv(firstrecv.view()), + _sendnum_scan(sendnum_scan.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + + int iswap = 0; + while (ii >= _sendnum_scan[iswap]) iswap++; + int i = ii; + if (iswap > 1) + i = ii - _sendnum_scan[iswap-1]; + const int _nfirst = _firstrecv[iswap]; + + const int j = _list(iswap,i); + if (_pbc_flag(iswap) == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd; + } + } + + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_self_squash(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + } + return n*3; +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index efe55c47ad..d8541ceb86 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -51,6 +51,13 @@ class AtomVecKokkos : public AtomVec { const int & iswap, const int nfirst, const int &pbc_flag, const int pbc[]); + virtual int + pack_comm_self_squash(const int &n, const DAT::tdual_int_2d &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc); + virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 1d31c07180..587f20a76d 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -242,10 +242,15 @@ void CommKokkos::forward_comm_device(int dummy) } } else { if (!ghost_velocity) { - if (sendnum[iswap]) - n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, - firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); + if (1) { + n = avec->pack_comm_self_squash(totalsend,k_sendlist,k_sendnum_scan, + k_firstrecv,k_pbc_flag,k_pbc); + } else { + if (sendnum[iswap]) + n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + } } else { n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); @@ -1036,6 +1041,36 @@ void CommKokkos::borders_device() { atomKK->sync(Host,TAG_MASK); atom->map_set(); } + + if (1) { + k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); + k_pbc_flag = DAT::tdual_int_1d("comm:pbc_flag",nswap); + k_firstrecv = DAT::tdual_int_1d("comm:firstrecv",nswap); + k_sendnum_scan = DAT::tdual_int_1d("comm:sendnum_scan",nswap); + int scan = 0; + for (int iswap = 0; iswap < nswap; iswap++) { + scan += sendnum[iswap]; + k_sendnum_scan.h_view[iswap] = scan; + k_firstrecv.h_view[iswap] = firstrecv[iswap]; + k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; + k_pbc.h_view(iswap,0) = pbc[iswap][0]; + k_pbc.h_view(iswap,1) = pbc[iswap][1]; + k_pbc.h_view(iswap,2) = pbc[iswap][2]; + k_pbc.h_view(iswap,3) = pbc[iswap][3]; + k_pbc.h_view(iswap,4) = pbc[iswap][4]; + k_pbc.h_view(iswap,5) = pbc[iswap][5]; + } + totalsend = scan; + k_pbc .modify(); + k_pbc_flag .modify(); + k_firstrecv .modify(); + k_sendnum_scan.modify(); + + k_pbc .sync(); + k_pbc_flag .sync(); + k_firstrecv .sync(); + k_sendnum_scan.sync(); + } } /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index f137655cb8..bf2ee8822f 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -63,6 +63,12 @@ class CommKokkos : public CommBrick { //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm + DAT::tdual_int_2d k_pbc; + DAT::tdual_int_1d k_pbc_flag; + DAT::tdual_int_1d k_firstrecv; + DAT::tdual_int_1d k_sendnum_scan; + int totalsend; + int max_buf_pair; DAT::tdual_xfloat_1d k_buf_send_pair; DAT::tdual_xfloat_1d k_buf_recv_pair; From 85a14ebcb8fde53d5398671cb4f38990710d0346 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 25 Feb 2019 09:17:34 -0700 Subject: [PATCH 03/34] Fix issue with comm squash --- src/KOKKOS/atom_vec_kokkos.cpp | 3 ++- src/KOKKOS/comm_kokkos.cpp | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 6fda61bc31..5a1b1bde22 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -307,10 +307,11 @@ struct AtomVecKokkos_PackCommSelfSquash { int iswap = 0; while (ii >= _sendnum_scan[iswap]) iswap++; int i = ii; - if (iswap > 1) + if (iswap > 0) i = ii - _sendnum_scan[iswap-1]; const int _nfirst = _firstrecv[iswap]; + const int j = _list(iswap,i); if (_pbc_flag(iswap) == 0) { _xw(i+_nfirst,0) = _x(j,0); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 587f20a76d..c288a0e4a1 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -187,6 +187,13 @@ void CommKokkos::forward_comm_device(int dummy) k_sendlist.sync(); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); + int comm_squash = 1; + if (comm_squash) { + n = avec->pack_comm_self_squash(totalsend,k_sendlist,k_sendnum_scan, + k_firstrecv,k_pbc_flag,k_pbc); + DeviceType::fence(); + } else { + for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { @@ -242,15 +249,10 @@ void CommKokkos::forward_comm_device(int dummy) } } else { if (!ghost_velocity) { - if (1) { - n = avec->pack_comm_self_squash(totalsend,k_sendlist,k_sendnum_scan, - k_firstrecv,k_pbc_flag,k_pbc); - } else { - if (sendnum[iswap]) - n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, - firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - } + if (sendnum[iswap]) + n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); } else { n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); @@ -260,6 +262,7 @@ void CommKokkos::forward_comm_device(int dummy) } } } + } } /* ---------------------------------------------------------------------- From 5d8e3c6cb4d054f963d9319373ab2b43116b7d82 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 28 Feb 2019 10:14:35 -0700 Subject: [PATCH 04/34] Optimize reneighbor for small systems --- src/KOKKOS/comm_kokkos.cpp | 27 +++++++++++++++------------ src/KOKKOS/comm_kokkos.h | 1 + src/KOKKOS/npair_kokkos.h | 4 ++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c288a0e4a1..c782305ef5 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1046,10 +1046,16 @@ void CommKokkos::borders_device() { } if (1) { - k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); - k_pbc_flag = DAT::tdual_int_1d("comm:pbc_flag",nswap); - k_firstrecv = DAT::tdual_int_1d("comm:firstrecv",nswap); - k_sendnum_scan = DAT::tdual_int_1d("comm:sendnum_scan",nswap); + if (nswap > k_pbc.extent(0)) { + k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); + k_swap = DAT::tdual_int_2d("comm:swap",3,nswap); + k_pbc_flag .d_view = Kokkos::subview(k_swap.d_view,0,Kokkos::ALL); + k_firstrecv .d_view = Kokkos::subview(k_swap.d_view,1,Kokkos::ALL); + k_sendnum_scan.d_view = Kokkos::subview(k_swap.d_view,2,Kokkos::ALL); + k_pbc_flag .h_view = Kokkos::subview(k_swap.h_view,0,Kokkos::ALL); + k_firstrecv .h_view = Kokkos::subview(k_swap.h_view,1,Kokkos::ALL); + k_sendnum_scan.h_view = Kokkos::subview(k_swap.h_view,2,Kokkos::ALL); + } int scan = 0; for (int iswap = 0; iswap < nswap; iswap++) { scan += sendnum[iswap]; @@ -1064,15 +1070,12 @@ void CommKokkos::borders_device() { k_pbc.h_view(iswap,5) = pbc[iswap][5]; } totalsend = scan; - k_pbc .modify(); - k_pbc_flag .modify(); - k_firstrecv .modify(); - k_sendnum_scan.modify(); - k_pbc .sync(); - k_pbc_flag .sync(); - k_firstrecv .sync(); - k_sendnum_scan.sync(); + k_swap.modify(); + k_pbc.modify(); + + k_swap.sync(); + k_pbc.sync(); } } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index bf2ee8822f..cab8124231 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -63,6 +63,7 @@ class CommKokkos : public CommBrick { //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm + DAT::tdual_int_2d k_swap; DAT::tdual_int_2d k_pbc; DAT::tdual_int_1d k_pbc_flag; DAT::tdual_int_1d k_firstrecv; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 970e40c9fc..373ddf799e 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -280,7 +280,7 @@ class NeighborKokkosExecute bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); + resize = typename AT::t_int_scalar(Kokkos::view_alloc("NeighborKokkosFunctor::resize",Kokkos::WithoutInitializing)); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(resize); #else @@ -288,7 +288,7 @@ class NeighborKokkosExecute #endif h_resize() = 1; new_maxneighs = typename AT:: - t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); + t_int_scalar(Kokkos::view_alloc("NeighborKokkosFunctor::new_maxneighs",Kokkos::WithoutInitializing)); #ifndef KOKKOS_USE_CUDA_UVM h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); #else From ff7276e494082d9fca6f766469f826b378970836 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 7 Mar 2019 08:56:13 -0700 Subject: [PATCH 05/34] Clean up the fused comm --- src/KOKKOS/atom_vec_kokkos.cpp | 14 +++++++------- src/KOKKOS/atom_vec_kokkos.h | 10 +++++----- src/KOKKOS/comm_kokkos.cpp | 8 +++----- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 5a1b1bde22..076e3e52fa 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -271,7 +271,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c /* ---------------------------------------------------------------------- */ template -struct AtomVecKokkos_PackCommSelfSquash { +struct AtomVecKokkos_PackCommSelfFused { typedef DeviceType device_type; typename ArrayTypes::t_x_array_randomread _x; @@ -283,7 +283,7 @@ struct AtomVecKokkos_PackCommSelfSquash { typename ArrayTypes::t_int_1d_const _sendnum_scan; X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; - AtomVecKokkos_PackCommSelfSquash( + AtomVecKokkos_PackCommSelfFused( const typename DAT::tdual_x_array &x, const typename DAT::tdual_int_2d &list, const typename DAT::tdual_int_2d &pbc, @@ -334,18 +334,18 @@ struct AtomVecKokkos_PackCommSelfSquash { /* ---------------------------------------------------------------------- */ -int AtomVecKokkos::pack_comm_self_squash(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan, +int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan, const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc) { if(commKK->forward_comm_on_host) { sync(Host,X_MASK); modified(Host,X_MASK); if(domain->triclinic) { - struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); @@ -354,12 +354,12 @@ int AtomVecKokkos::pack_comm_self_squash(const int &n, const DAT::tdual_int_2d & sync(Device,X_MASK); modified(Device,X_MASK); if(domain->triclinic) { - struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfSquash f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index d8541ceb86..64fd238fc0 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -52,11 +52,11 @@ class AtomVecKokkos : public AtomVec { const int &pbc_flag, const int pbc[]); virtual int - pack_comm_self_squash(const int &n, const DAT::tdual_int_2d &list, - const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, - const DAT::tdual_int_1d &pbc_flag, - const DAT::tdual_int_2d &pbc); + pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc); virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c782305ef5..5aa2cbdfbe 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -187,11 +187,9 @@ void CommKokkos::forward_comm_device(int dummy) k_sendlist.sync(); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - int comm_squash = 1; - if (comm_squash) { - n = avec->pack_comm_self_squash(totalsend,k_sendlist,k_sendnum_scan, + if (comm->nprocs == 1) { + n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan, k_firstrecv,k_pbc_flag,k_pbc); - DeviceType::fence(); } else { for (int iswap = 0; iswap < nswap; iswap++) { @@ -1045,7 +1043,7 @@ void CommKokkos::borders_device() { atom->map_set(); } - if (1) { + if (comm->nprocs == 1) { if (nswap > k_pbc.extent(0)) { k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); k_swap = DAT::tdual_int_2d("comm:swap",3,nswap); From e422e886de5a719a49c9974b92ce9193bcd1de1e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 8 Mar 2019 11:33:29 -0700 Subject: [PATCH 06/34] Add error check for team on and full neighborlist --- src/KOKKOS/kokkos.cpp | 3 +++ src/KOKKOS/kokkos.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 6c87835195..efd3a75042 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -344,6 +344,9 @@ void KokkosLMP::accelerator(int narg, char **arg) force->newton = force->newton_pair = force->newton_bond = newtonflag; + if (team_flag && neighflag != FULL) + error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'team on'"); + neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; else neighbor->binsizeflag = 1; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index a665329d70..c70d7a31f5 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -86,4 +86,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs Using Kokkos half-neighbor lists with threading is not allowed. +E: Must use KOKKOS package option 'neigh full' with 'team on' + +The 'team on' option requires a full neighbor list + */ From aecef752e8beee76465feb03c05276235dd41952 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 11 Mar 2019 13:41:20 -0600 Subject: [PATCH 07/34] Remove unnecessary data movement in fix_nve_kokkos --- src/KOKKOS/fix_nve_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index 052bf411d6..6db8ff8c0f 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -113,8 +113,8 @@ void FixNVEKokkos::initial_integrate_rmass_item(int i) const template void FixNVEKokkos::final_integrate() { - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); + atomKK->sync(execution_space,V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK); + atomKK->modified(execution_space,V_MASK); v = atomKK->k_v.view(); f = atomKK->k_f.view(); From 71a622724042151cb2c861a69ce1209cc0de58ff Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 14 Mar 2019 15:43:50 -0600 Subject: [PATCH 08/34] Optimize KOKKOS package for small system sizes --- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 5 ++- src/KOKKOS/domain_kokkos.cpp | 12 ++++++ src/KOKKOS/npair_kokkos.cpp | 56 ++++++++++++++++----------- src/KOKKOS/npair_kokkos.h | 30 +++++++------- src/KOKKOS/pair_kokkos.h | 13 +++---- src/neighbor.h | 4 +- 6 files changed, 72 insertions(+), 48 deletions(-) diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 6aba49e5f3..e3c1bee956 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -55,7 +55,8 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAtomicKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index d9c1332778..4cf3e6ab52 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -17,6 +17,7 @@ #include "error.h" #include "force.h" #include "kspace.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -339,6 +340,17 @@ struct DomainPBCFunctor { void DomainKokkos::pbc() { + + if (lmp->kokkos->exchange_comm_classic) { + + // reduce GPU data movement + + atomKK->sync(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK); + Domain::pbc(); + atomKK->modified(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK); + return; + } + double *lo,*hi,*period; int nlocal = atomKK->nlocal; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 5e1b7b0414..f2e73ac6e6 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -15,6 +15,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" #include "domain_kokkos.h" +#include "update.h" #include "neighbor_kokkos.h" #include "nbin_kokkos.h" #include "nstencil.h" @@ -27,6 +28,16 @@ namespace LAMMPS_NS { template NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { + // use 1D view for scalars to reduce GPU memory operations + + d_scalars = typename AT::t_int_1d("neighbor:scalars",2); + h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2); + + d_resize = Kokkos::subview(d_scalars,0); + d_new_maxneighs = Kokkos::subview(d_scalars,1); + + h_resize = Kokkos::subview(h_scalars,0); + h_new_maxneighs = Kokkos::subview(h_scalars,1); } /* ---------------------------------------------------------------------- @@ -84,27 +95,30 @@ template void NPairKokkos::copy_stencil_info() { NPair::copy_stencil_info(); - nstencil = ns->nstencil; - int maxstencil = ns->get_maxstencil(); + if (neighbor->last_setup_bins == update->ntimestep) { + // copy stencil to device as it may have changed - if (maxstencil > k_stencil.extent(0)) - k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); - for (int k = 0; k < maxstencil; k++) - k_stencil.h_view(k) = ns->stencil[k]; - k_stencil.modify(); - k_stencil.sync(); - if (GHOST) { - if (maxstencil > k_stencilxyz.extent(0)) - k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); - for (int k = 0; k < maxstencil; k++) { - k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; - k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; - k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + int maxstencil = ns->get_maxstencil(); + + if (maxstencil > k_stencil.extent(0)) + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + for (int k = 0; k < maxstencil; k++) + k_stencil.h_view(k) = ns->stencil[k]; + k_stencil.modify(); + k_stencil.sync(); + if (GHOST) { + if (maxstencil > k_stencilxyz.extent(0)) + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + for (int k = 0; k < maxstencil; k++) { + k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; + k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; + k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + } + k_stencilxyz.modify(); + k_stencilxyz.sync(); } - k_stencilxyz.modify(); - k_stencilxyz.sync(); } } @@ -157,7 +171,7 @@ void NPairKokkos::build(NeighList *list_) bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, domain->xprd_half,domain->yprd_half,domain->zprd_half, - skin); + skin,d_resize,h_resize,d_new_maxneighs,h_new_maxneighs); k_cutneighsq.sync(); k_ex1_type.sync(); @@ -185,8 +199,7 @@ void NPairKokkos::build(NeighList *list_) data.h_new_maxneighs() = list->maxneighs; data.h_resize() = 0; - Kokkos::deep_copy(data.resize, data.h_resize); - Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); + Kokkos::deep_copy(d_scalars, h_scalars); #ifdef KOKKOS_ENABLE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; @@ -245,10 +258,9 @@ void NPairKokkos::build(NeighList *list_) } } } - deep_copy(data.h_resize, data.resize); + Kokkos::deep_copy(h_scalars, d_scalars); if(data.h_resize()) { - deep_copy(data.h_new_maxneighs, data.new_maxneighs); list->maxneighs = data.h_new_maxneighs() * 1.2; list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs); data.neigh_list.d_neighbors = list->d_neighbors; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 6986fc5849..edf3d2a59f 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -95,6 +95,8 @@ namespace LAMMPS_NS { template class NPairKokkos : public NPair { + typedef ArrayTypes AT; + public: NPairKokkos(class LAMMPS *); ~NPairKokkos() {} @@ -105,6 +107,12 @@ class NPairKokkos : public NPair { private: int newton_pair; + typename AT::t_int_1d d_scalars; + HAT::t_int_1d h_scalars; + typename AT::t_int_scalar d_resize; + typename AT::t_int_scalar d_new_maxneighs; + HAT::t_int_scalar h_resize; + HAT::t_int_scalar h_new_maxneighs; // data from Neighbor class @@ -251,7 +259,11 @@ class NeighborKokkosExecute const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, const int & _xprd_half, const int & _yprd_half, const int & _zprd_half, - const X_FLOAT _skin): + const X_FLOAT _skin, + const typename AT::t_int_scalar _resize, + const typename ArrayTypes::t_int_scalar _h_resize, + const typename AT::t_int_scalar _new_maxneighs, + const typename ArrayTypes::t_int_scalar _h_new_maxneighs): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), atom2bin(_atom2bin),c_atom2bin(_atom2bin), @@ -272,7 +284,8 @@ class NeighborKokkosExecute ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half), - skin(_skin) { + skin(_skin),resize(_resize),h_resize(_h_resize), + new_maxneighs(_new_maxneighs),h_new_maxneighs(_h_new_maxneighs) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; @@ -280,20 +293,7 @@ class NeighborKokkosExecute bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - resize = typename AT::t_int_scalar(Kokkos::view_alloc("NeighborKokkosFunctor::resize",Kokkos::WithoutInitializing)); -#ifndef KOKKOS_USE_CUDA_UVM - h_resize = Kokkos::create_mirror_view(resize); -#else - h_resize = resize; -#endif h_resize() = 1; - new_maxneighs = typename AT:: - t_int_scalar(Kokkos::view_alloc("NeighborKokkosFunctor::new_maxneighs",Kokkos::WithoutInitializing)); -#ifndef KOKKOS_USE_CUDA_UVM - h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); -#else - h_new_maxneighs = new_maxneighs; -#endif h_new_maxneighs() = neigh_list.maxneighs; }; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 8758b2f03c..63502a1e27 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -274,7 +274,7 @@ struct PairComputeFunctor { const X_FLOAT ytmp = c.x(i,1); const X_FLOAT ztmp = c.x(i,2); const int itype = c.type(i); - + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -388,13 +388,12 @@ struct PairComputeFunctor { const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { - const int i = list.d_ilist[ii]; const X_FLOAT xtmp = c.x(i,0); const X_FLOAT ytmp = c.x(i,1); const X_FLOAT ztmp = c.x(i,2); const int itype = c.type(i); - + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -875,14 +874,14 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable PairComputeFunctor ff(fpair,list); atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); - else Kokkos::parallel_for(policy,ff); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::Experimental::require(policy,Kokkos::Experimental::WorkItemProperty::HintLightWeight),ff,ev); + else Kokkos::parallel_for(Kokkos::Experimental::require(policy,Kokkos::Experimental::WorkItemProperty::HintLightWeight),ff); } else { PairComputeFunctor ff(fpair,list); atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); - else Kokkos::parallel_for(policy,ff); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(Kokkos::Experimental::require(policy,Kokkos::Experimental::WorkItemProperty::HintLightWeight),ff,ev); + else Kokkos::parallel_for(Kokkos::Experimental::require(policy,Kokkos::Experimental::WorkItemProperty::HintLightWeight),ff); } } else { if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { diff --git a/src/neighbor.h b/src/neighbor.h index 751beeae4b..6184731b61 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -126,6 +126,8 @@ class Neighbor : protected Pointers { bigint memory_usage(); + bigint last_setup_bins; // step of last neighbor::setup_bins() call + protected: int me,nprocs; int firsttime; // flag for calling init_styles() only once @@ -139,8 +141,6 @@ class Neighbor : protected Pointers { int fix_check; // # of fixes that induce reneigh int *fixchecklist; // which fixes to check - bigint last_setup_bins; // step of last neighbor::setup_bins() call - double triggersq; // trigger = build when atom moves this dist double **xhold; // atom coords at last neighbor build From 179026dd44108f78b600ecb957be5039bdcb7c82 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 14 Mar 2019 17:13:12 -0600 Subject: [PATCH 09/34] Reduce GPU data movement in npair_kokkos --- src/KOKKOS/npair_kokkos.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index f2e73ac6e6..ecf4b2d5a5 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -187,7 +187,18 @@ void NPairKokkos::build(NeighList *list_) k_bincount.sync(); k_bins.sync(); k_atom2bin.sync(); - atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + + if (atom->molecular) { + if (exclude) + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + else + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|TAG_MASK|SPECIAL_MASK); + } else { + if (exclude) + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK); + else + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK); + } data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; From 8c4baac3f10b7d2c505a9ffe73c22bdd7e8656ee Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 15 Mar 2019 14:25:24 -0600 Subject: [PATCH 10/34] Only copy force on ghost atoms if newton on --- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index e3c1bee956..80321fd2ea 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -21,6 +21,7 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; @@ -901,7 +902,14 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) if (space == Device) { if (mask & X_MASK) atomKK->k_x.sync(); if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & F_MASK) { + if (!force || force->newton) { + atomKK->k_f.sync(); + } else { + auto k_f_nlocal = Kokkos::subview(atomKK->k_f,std::make_pair(0,atom->nlocal),Kokkos::ALL); + k_f_nlocal.sync(); + } + } if (mask & TAG_MASK) atomKK->k_tag.sync(); if (mask & TYPE_MASK) atomKK->k_type.sync(); if (mask & MASK_MASK) atomKK->k_mask.sync(); @@ -909,7 +917,14 @@ void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) } else { if (mask & X_MASK) atomKK->k_x.sync(); if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & F_MASK) { + if (!force || force->newton) { + atomKK->k_f.sync(); + } else { + auto k_f_nlocal = Kokkos::subview(atomKK->k_f,std::make_pair(0,atom->nlocal),Kokkos::ALL); + k_f_nlocal.sync(); + } + } if (mask & TAG_MASK) atomKK->k_tag.sync(); if (mask & TYPE_MASK) atomKK->k_type.sync(); if (mask & MASK_MASK) atomKK->k_mask.sync(); From 36836598b1a4bbe76fac874274ae9d5f1dc635b9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 18 Mar 2019 10:45:14 -0600 Subject: [PATCH 11/34] Reduce data transfer in exchange --- src/KOKKOS/comm_kokkos.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 5aa2cbdfbe..06807b08b9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -57,10 +57,10 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_sendlist = DAT:: - tdual_int_1d("comm:k_exchange_sendlist",100); - k_exchange_copylist = DAT:: - tdual_int_1d("comm:k_exchange_copylist",100); + k_exchange_lists = DAT:: + tdual_int_1d("comm:k_exchange_lists",2,100); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,KOKKOS::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,KOKKOS::ALL); k_count = DAT::tdual_int_scalar("comm:k_count"); k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); @@ -619,8 +619,9 @@ void CommKokkos::exchange_device() k_count.h_view()=k_exchange_sendlist.h_view.extent(0); } } - k_exchange_copylist.sync(); - k_exchange_sendlist.sync(); + + auto k_exchange_lists_short = Kokkos::subview(k_exchange_lists,KOKKOS::ALL,k_count.h_view()); + k_exchange_lists_short.template sync(); k_sendflag.sync(); int sendpos = nlocal-1; @@ -634,8 +635,8 @@ void CommKokkos::exchange_device() k_exchange_copylist.h_view(i) = -1; } - k_exchange_copylist.modify(); - k_exchange_copylist.sync(); + k_exchange_copylist_short.modify(); + k_exchange_copylist_short.sync(); nsend = k_count.h_view(); if (nsend > maxsend) grow_send_kokkos(nsend,1); nsend = From b50ef59a199346141ae06bea6e474fb5ade762bd Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 18 Mar 2019 13:17:32 -0600 Subject: [PATCH 12/34] Optimize Kokkos comm for small systems --- src/KOKKOS/comm_kokkos.cpp | 96 ++++++++++++++++++++------------------ src/KOKKOS/comm_kokkos.h | 2 + 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 06807b08b9..814824bc5b 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -57,10 +57,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_lists = DAT:: - tdual_int_1d("comm:k_exchange_lists",2,100); - k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,KOKKOS::ALL); - k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,KOKKOS::ALL); + k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); k_count = DAT::tdual_int_scalar("comm:k_count"); k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); @@ -188,6 +187,8 @@ void CommKokkos::forward_comm_device(int dummy) atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); if (comm->nprocs == 1) { + k_swap.sync(); + k_pbc.sync(); n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan, k_firstrecv,k_pbc_flag,k_pbc); } else { @@ -620,8 +621,7 @@ void CommKokkos::exchange_device() } } - auto k_exchange_lists_short = Kokkos::subview(k_exchange_lists,KOKKOS::ALL,k_count.h_view()); - k_exchange_lists_short.template sync(); + k_exchange_lists.sync(); k_sendflag.sync(); int sendpos = nlocal-1; @@ -635,6 +635,7 @@ void CommKokkos::exchange_device() k_exchange_copylist.h_view(i) = -1; } + auto k_exchange_copylist_short = Kokkos::subview(k_exchange_copylist,k_count.h_view()); k_exchange_copylist_short.modify(); k_exchange_copylist_short.sync(); nsend = k_count.h_view(); @@ -749,14 +750,16 @@ void CommKokkos::borders() if (!exchange_comm_classic) { if (exchange_comm_on_host) borders_device(); else borders_device(); - return; + } else { + atomKK->sync(Host,ALL_MASK); + k_sendlist.sync(); + CommBrick::borders(); + k_sendlist.modify(); + atomKK->modified(Host,ALL_MASK); } - atomKK->sync(Host,ALL_MASK); - k_sendlist.sync(); - CommBrick::borders(); - k_sendlist.modify(); - atomKK->modified(Host,ALL_MASK); + if (comm->nprocs == 1 && !forward_comm_classic) + copy_pbc_info(); } /* ---------------------------------------------------------------------- */ @@ -1043,40 +1046,43 @@ void CommKokkos::borders_device() { atomKK->sync(Host,TAG_MASK); atom->map_set(); } - - if (comm->nprocs == 1) { - if (nswap > k_pbc.extent(0)) { - k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); - k_swap = DAT::tdual_int_2d("comm:swap",3,nswap); - k_pbc_flag .d_view = Kokkos::subview(k_swap.d_view,0,Kokkos::ALL); - k_firstrecv .d_view = Kokkos::subview(k_swap.d_view,1,Kokkos::ALL); - k_sendnum_scan.d_view = Kokkos::subview(k_swap.d_view,2,Kokkos::ALL); - k_pbc_flag .h_view = Kokkos::subview(k_swap.h_view,0,Kokkos::ALL); - k_firstrecv .h_view = Kokkos::subview(k_swap.h_view,1,Kokkos::ALL); - k_sendnum_scan.h_view = Kokkos::subview(k_swap.h_view,2,Kokkos::ALL); - } - int scan = 0; - for (int iswap = 0; iswap < nswap; iswap++) { - scan += sendnum[iswap]; - k_sendnum_scan.h_view[iswap] = scan; - k_firstrecv.h_view[iswap] = firstrecv[iswap]; - k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; - k_pbc.h_view(iswap,0) = pbc[iswap][0]; - k_pbc.h_view(iswap,1) = pbc[iswap][1]; - k_pbc.h_view(iswap,2) = pbc[iswap][2]; - k_pbc.h_view(iswap,3) = pbc[iswap][3]; - k_pbc.h_view(iswap,4) = pbc[iswap][4]; - k_pbc.h_view(iswap,5) = pbc[iswap][5]; - } - totalsend = scan; - - k_swap.modify(); - k_pbc.modify(); - - k_swap.sync(); - k_pbc.sync(); - } } + +/* ---------------------------------------------------------------------- + copy pbc info +------------------------------------------------------------------------- */ + +void CommKokkos::copy_pbc_info() +{ + if (nswap > k_pbc.extent(0)) { + k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); + k_swap = DAT::tdual_int_2d("comm:swap",3,nswap); + k_pbc_flag .d_view = Kokkos::subview(k_swap.d_view,0,Kokkos::ALL); + k_firstrecv .d_view = Kokkos::subview(k_swap.d_view,1,Kokkos::ALL); + k_sendnum_scan.d_view = Kokkos::subview(k_swap.d_view,2,Kokkos::ALL); + k_pbc_flag .h_view = Kokkos::subview(k_swap.h_view,0,Kokkos::ALL); + k_firstrecv .h_view = Kokkos::subview(k_swap.h_view,1,Kokkos::ALL); + k_sendnum_scan.h_view = Kokkos::subview(k_swap.h_view,2,Kokkos::ALL); + } + int scan = 0; + for (int iswap = 0; iswap < nswap; iswap++) { + scan += sendnum[iswap]; + k_sendnum_scan.h_view[iswap] = scan; + k_firstrecv.h_view[iswap] = firstrecv[iswap]; + k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; + k_pbc.h_view(iswap,0) = pbc[iswap][0]; + k_pbc.h_view(iswap,1) = pbc[iswap][1]; + k_pbc.h_view(iswap,2) = pbc[iswap][2]; + k_pbc.h_view(iswap,3) = pbc[iswap][3]; + k_pbc.h_view(iswap,4) = pbc[iswap][4]; + k_pbc.h_view(iswap,5) = pbc[iswap][5]; + } + totalsend = scan; + + k_swap.modify(); + k_pbc.modify(); +} + /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra if flag = 1, realloc diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index cab8124231..194826f9df 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -58,6 +58,7 @@ class CommKokkos : public CommBrick { DAT::tdual_int_2d k_sendlist; DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; + DAT::tdual_int_2d k_exchange_lists; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; DAT::tdual_int_scalar k_count; //double *buf_send; // send buffer for all comm @@ -81,6 +82,7 @@ class CommKokkos : public CommBrick { void grow_recv_kokkos(int, ExecutionSpace space = Host); void grow_list(int, int); void grow_swap(int); + void copy_pbc_info(); }; } From e2d28f5160d9f58c1519ee40fd289c8918f1383a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 18 Mar 2019 15:27:35 -0600 Subject: [PATCH 13/34] Only copy pbc info in comm setup --- src/KOKKOS/comm_kokkos.cpp | 52 +++++++++++++++++++++++--------------- src/KOKKOS/comm_kokkos.h | 3 ++- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 814824bc5b..c6d8424c27 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -140,6 +140,31 @@ void CommKokkos::init() forward_comm_classic = true; } +/* ---------------------------------------------------------------------- */ + +void CommKokkos::setup() +{ + CommBrick::setup(); + + k_pbc_flag = DAT::tdual_int_1d("comm:pbc_flag",nswap); + k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); + + for (int iswap = 0; iswap < nswap; iswap++) { + k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; + k_pbc.h_view(iswap,0) = pbc[iswap][0]; + k_pbc.h_view(iswap,1) = pbc[iswap][1]; + k_pbc.h_view(iswap,2) = pbc[iswap][2]; + k_pbc.h_view(iswap,3) = pbc[iswap][3]; + k_pbc.h_view(iswap,4) = pbc[iswap][4]; + k_pbc.h_view(iswap,5) = pbc[iswap][5]; + } + k_pbc_flag.modify(); + k_pbc.modify(); + + k_pbc_flag.sync(); + k_pbc.sync(); +} + /* ---------------------------------------------------------------------- forward communication of atom coords every timestep other per-atom attributes may also be sent via pack/unpack routines @@ -759,7 +784,7 @@ void CommKokkos::borders() } if (comm->nprocs == 1 && !forward_comm_classic) - copy_pbc_info(); + copy_swap_info(); } /* ---------------------------------------------------------------------- */ @@ -1049,38 +1074,25 @@ void CommKokkos::borders_device() { } /* ---------------------------------------------------------------------- - copy pbc info + copy swap info ------------------------------------------------------------------------- */ -void CommKokkos::copy_pbc_info() +void CommKokkos::copy_swap_info() { - if (nswap > k_pbc.extent(0)) { - k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); - k_swap = DAT::tdual_int_2d("comm:swap",3,nswap); - k_pbc_flag .d_view = Kokkos::subview(k_swap.d_view,0,Kokkos::ALL); - k_firstrecv .d_view = Kokkos::subview(k_swap.d_view,1,Kokkos::ALL); - k_sendnum_scan.d_view = Kokkos::subview(k_swap.d_view,2,Kokkos::ALL); - k_pbc_flag .h_view = Kokkos::subview(k_swap.h_view,0,Kokkos::ALL); - k_firstrecv .h_view = Kokkos::subview(k_swap.h_view,1,Kokkos::ALL); - k_sendnum_scan.h_view = Kokkos::subview(k_swap.h_view,2,Kokkos::ALL); + if (nswap > k_swap.extent(1)) { + k_swap = DAT::tdual_int_2d("comm:swap",2,nswap); + k_firstrecv = Kokkos::subview(k_swap,0,Kokkos::ALL); + k_sendnum_scan = Kokkos::subview(k_swap,1,Kokkos::ALL); } int scan = 0; for (int iswap = 0; iswap < nswap; iswap++) { scan += sendnum[iswap]; k_sendnum_scan.h_view[iswap] = scan; k_firstrecv.h_view[iswap] = firstrecv[iswap]; - k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; - k_pbc.h_view(iswap,0) = pbc[iswap][0]; - k_pbc.h_view(iswap,1) = pbc[iswap][1]; - k_pbc.h_view(iswap,2) = pbc[iswap][2]; - k_pbc.h_view(iswap,3) = pbc[iswap][3]; - k_pbc.h_view(iswap,4) = pbc[iswap][4]; - k_pbc.h_view(iswap,5) = pbc[iswap][5]; } totalsend = scan; k_swap.modify(); - k_pbc.modify(); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index 194826f9df..d5428c8b0c 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -33,6 +33,7 @@ class CommKokkos : public CommBrick { CommKokkos(class LAMMPS *); ~CommKokkos(); void init(); + void setup(); void forward_comm(int dummy = 0); // forward comm of atom coords void reverse_comm(); // reverse comm of atom coords @@ -82,7 +83,7 @@ class CommKokkos : public CommBrick { void grow_recv_kokkos(int, ExecutionSpace space = Host); void grow_list(int, int); void grow_swap(int); - void copy_pbc_info(); + void copy_swap_info(); }; } From 08273c40d7489c7015b600ff7b9b5f4a7d029313 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 19 Mar 2019 14:29:45 -0600 Subject: [PATCH 14/34] Fix compile issue in comm_kokkos --- src/KOKKOS/comm_kokkos.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c6d8424c27..4396637153 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -661,8 +661,8 @@ void CommKokkos::exchange_device() } auto k_exchange_copylist_short = Kokkos::subview(k_exchange_copylist,k_count.h_view()); - k_exchange_copylist_short.modify(); - k_exchange_copylist_short.sync(); + k_exchange_copylist_short.template modify(); + k_exchange_copylist_short.template sync(); nsend = k_count.h_view(); if (nsend > maxsend) grow_send_kokkos(nsend,1); nsend = From d1e751d717752927b33b53fd2f613b8bd02288b7 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 20 Mar 2019 14:32:03 -0600 Subject: [PATCH 15/34] Fix thread safety issue in fused forward comm --- src/KOKKOS/atom_vec_kokkos.cpp | 38 +++++++++-------- src/KOKKOS/atom_vec_kokkos.h | 3 +- src/KOKKOS/comm_kokkos.cpp | 74 +++++++++++++++++++++------------- src/KOKKOS/comm_kokkos.h | 3 +- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 076e3e52fa..9e7de1785b 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -281,6 +281,7 @@ struct AtomVecKokkos_PackCommSelfFused { typename ArrayTypes::t_int_1d_const _pbc_flag; typename ArrayTypes::t_int_1d_const _firstrecv; typename ArrayTypes::t_int_1d_const _sendnum_scan; + typename ArrayTypes::t_int_1d_const _g2l; X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; AtomVecKokkos_PackCommSelfFused( @@ -290,6 +291,7 @@ struct AtomVecKokkos_PackCommSelfFused { const typename DAT::tdual_int_1d &pbc_flag, const typename DAT::tdual_int_1d &firstrecv, const typename DAT::tdual_int_1d &sendnum_scan, + const typename DAT::tdual_int_1d &g2l, const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz): _x(x.view()),_xw(x.view()), @@ -298,6 +300,7 @@ struct AtomVecKokkos_PackCommSelfFused { _pbc_flag(pbc_flag.view()), _firstrecv(firstrecv.view()), _sendnum_scan(sendnum_scan.view()), + _g2l(g2l.view()), _xprd(xprd),_yprd(yprd),_zprd(zprd), _xy(xy),_xz(xz),_yz(yz) {}; @@ -309,43 +312,46 @@ struct AtomVecKokkos_PackCommSelfFused { int i = ii; if (iswap > 0) i = ii - _sendnum_scan[iswap-1]; - const int _nfirst = _firstrecv[iswap]; + const int _nfirst = _firstrecv[iswap]; + const int nlocal = _firstrecv[0]; + int j = _list(iswap,i); + if (j >= nlocal) + j = _g2l(j-nlocal); - const int j = _list(iswap,i); - if (_pbc_flag(iswap) == 0) { + if (_pbc_flag(ii) == 0) { _xw(i+_nfirst,0) = _x(j,0); _xw(i+_nfirst,1) = _x(j,1); _xw(i+_nfirst,2) = _x(j,2); } else { if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd; + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(iswap,0)*_xprd + _pbc(iswap,5)*_xy + _pbc(iswap,4)*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(iswap,1)*_yprd + _pbc(iswap,3)*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(iswap,2)*_zprd; + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } } - } }; /* ---------------------------------------------------------------------- */ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan, - const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc) { + const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l) { if(commKK->forward_comm_on_host) { sync(Host,X_MASK); modified(Host,X_MASK); if(domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); @@ -354,18 +360,18 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &l sync(Device,X_MASK); modified(Device,X_MASK); if(domain->triclinic) { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); } else { - struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan, + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, domain->xprd,domain->yprd,domain->zprd, domain->xy,domain->xz,domain->yz); Kokkos::parallel_for(n,f); } } - return n*3; + return n*3; } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index ea83ef1c8f..0474a2380a 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -56,7 +56,8 @@ class AtomVecKokkos : public AtomVec { const DAT::tdual_int_1d &sendnum_scan, const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, - const DAT::tdual_int_2d &pbc); + const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l); virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 4396637153..cd6ade1c2f 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -140,31 +140,6 @@ void CommKokkos::init() forward_comm_classic = true; } -/* ---------------------------------------------------------------------- */ - -void CommKokkos::setup() -{ - CommBrick::setup(); - - k_pbc_flag = DAT::tdual_int_1d("comm:pbc_flag",nswap); - k_pbc = DAT::tdual_int_2d("comm:pbc",nswap,6); - - for (int iswap = 0; iswap < nswap; iswap++) { - k_pbc_flag.h_view[iswap] = pbc_flag[iswap]; - k_pbc.h_view(iswap,0) = pbc[iswap][0]; - k_pbc.h_view(iswap,1) = pbc[iswap][1]; - k_pbc.h_view(iswap,2) = pbc[iswap][2]; - k_pbc.h_view(iswap,3) = pbc[iswap][3]; - k_pbc.h_view(iswap,4) = pbc[iswap][4]; - k_pbc.h_view(iswap,5) = pbc[iswap][5]; - } - k_pbc_flag.modify(); - k_pbc.modify(); - - k_pbc_flag.sync(); - k_pbc.sync(); -} - /* ---------------------------------------------------------------------- forward communication of atom coords every timestep other per-atom attributes may also be sent via pack/unpack routines @@ -211,11 +186,12 @@ void CommKokkos::forward_comm_device(int dummy) k_sendlist.sync(); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - if (comm->nprocs == 1) { + if (comm->nprocs == 1 && !ghost_velocity) { k_swap.sync(); + k_swap2.sync(); k_pbc.sync(); n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan, - k_firstrecv,k_pbc_flag,k_pbc); + k_firstrecv,k_pbc_flag,k_pbc,k_g2l); } else { for (int iswap = 0; iswap < nswap; iswap++) { @@ -783,7 +759,7 @@ void CommKokkos::borders() atomKK->modified(Host,ALL_MASK); } - if (comm->nprocs == 1 && !forward_comm_classic) + if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_classic) copy_swap_info(); } @@ -1092,7 +1068,49 @@ void CommKokkos::copy_swap_info() } totalsend = scan; + int* list = NULL; + memory->create(list,totalsend,"comm:list"); + if (totalsend > k_pbc.extent(0)) { + k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6); + k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend); + k_pbc_flag = Kokkos::subview(k_swap2,0,Kokkos::ALL); + k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL); + } + + // create map of ghost atoms to local atoms + // store periodic boundary transform from local to ghost + + for (int iswap = 0; iswap < nswap; iswap++) { + for (int i = 0; i < sendnum[iswap]; i++) { + int source = sendlist[iswap][i] - atom->nlocal; + int dest = firstrecv[iswap] + i - atom->nlocal; + k_pbc_flag.h_view(dest) = pbc_flag[iswap]; + k_pbc.h_view(dest,0) = pbc[iswap][0]; + k_pbc.h_view(dest,1) = pbc[iswap][1]; + k_pbc.h_view(dest,2) = pbc[iswap][2]; + k_pbc.h_view(dest,3) = pbc[iswap][3]; + k_pbc.h_view(dest,4) = pbc[iswap][4]; + k_pbc.h_view(dest,5) = pbc[iswap][5]; + k_g2l.h_view(dest) = atom->nlocal + source; + + if (source >= 0) { + k_pbc_flag.h_view(dest) = k_pbc_flag.h_view(dest) || k_pbc_flag.h_view(source); + k_pbc.h_view(dest,0) += k_pbc.h_view(source,0); + k_pbc.h_view(dest,1) += k_pbc.h_view(source,1); + k_pbc.h_view(dest,2) += k_pbc.h_view(source,2); + k_pbc.h_view(dest,3) += k_pbc.h_view(source,3); + k_pbc.h_view(dest,4) += k_pbc.h_view(source,4); + k_pbc.h_view(dest,5) += k_pbc.h_view(source,5); + k_g2l.h_view(dest) = k_g2l.h_view(source); + } + } + } + k_swap.modify(); + k_swap2.modify(); + k_pbc.modify(); + + memory->destroy(list); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index d5428c8b0c..9d8766e309 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -33,7 +33,6 @@ class CommKokkos : public CommBrick { CommKokkos(class LAMMPS *); ~CommKokkos(); void init(); - void setup(); void forward_comm(int dummy = 0); // forward comm of atom coords void reverse_comm(); // reverse comm of atom coords @@ -66,8 +65,10 @@ class CommKokkos : public CommBrick { //double *buf_recv; // recv buffer for all comm DAT::tdual_int_2d k_swap; + DAT::tdual_int_2d k_swap2; DAT::tdual_int_2d k_pbc; DAT::tdual_int_1d k_pbc_flag; + DAT::tdual_int_1d k_g2l; DAT::tdual_int_1d k_firstrecv; DAT::tdual_int_1d k_sendnum_scan; int totalsend; From 1f44dc2498366b80c0e0b0f36424728a2c98cf0f Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 20 Mar 2019 15:01:47 -0600 Subject: [PATCH 16/34] Remove unused array in comm_kokkos --- src/KOKKOS/comm_kokkos.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index cd6ade1c2f..a89889bd28 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1068,7 +1068,9 @@ void CommKokkos::copy_swap_info() } totalsend = scan; - int* list = NULL; + // create map of ghost to local atom id + // store periodic boundary transform from local to ghost + memory->create(list,totalsend,"comm:list"); if (totalsend > k_pbc.extent(0)) { k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6); @@ -1077,9 +1079,6 @@ void CommKokkos::copy_swap_info() k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL); } - // create map of ghost atoms to local atoms - // store periodic boundary transform from local to ghost - for (int iswap = 0; iswap < nswap; iswap++) { for (int i = 0; i < sendnum[iswap]; i++) { int source = sendlist[iswap][i] - atom->nlocal; @@ -1109,8 +1108,6 @@ void CommKokkos::copy_swap_info() k_swap.modify(); k_swap2.modify(); k_pbc.modify(); - - memory->destroy(list); } /* ---------------------------------------------------------------------- From 744a8215dd4c2bd29c6b056a142f88e756213675 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 20 Mar 2019 15:08:08 -0600 Subject: [PATCH 17/34] Fix compile error in comm_kokkos and indent in atom_vec_kokkos --- src/KOKKOS/atom_vec_kokkos.cpp | 37 +++++++++++++++++----------------- src/KOKKOS/comm_kokkos.cpp | 1 - 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 9e7de1785b..7d5df17544 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -312,28 +312,29 @@ struct AtomVecKokkos_PackCommSelfFused { int i = ii; if (iswap > 0) i = ii - _sendnum_scan[iswap-1]; - const int _nfirst = _firstrecv[iswap]; - const int nlocal = _firstrecv[0]; - int j = _list(iswap,i); - if (j >= nlocal) - j = _g2l(j-nlocal); + const int _nfirst = _firstrecv[iswap]; + const int nlocal = _firstrecv[0]; - if (_pbc_flag(ii) == 0) { - _xw(i+_nfirst,0) = _x(j,0); - _xw(i+_nfirst,1) = _x(j,1); - _xw(i+_nfirst,2) = _x(j,2); + int j = _list(iswap,i); + if (j >= nlocal) + j = _g2l(j-nlocal); + + if (_pbc_flag(ii) == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } else { - if (TRICLINIC == 0) { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - } else { - _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; - _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; - _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; - } + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; } + } } }; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index a89889bd28..d52011879d 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1071,7 +1071,6 @@ void CommKokkos::copy_swap_info() // create map of ghost to local atom id // store periodic boundary transform from local to ghost - memory->create(list,totalsend,"comm:list"); if (totalsend > k_pbc.extent(0)) { k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6); k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend); From c3adfcbc155859f4313503a0e1acef5c71d84b0b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 21 Mar 2019 08:56:12 -0600 Subject: [PATCH 18/34] Add missing sync in comm_kokkos --- src/KOKKOS/comm_kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index d52011879d..7432f30b95 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -1071,6 +1071,8 @@ void CommKokkos::copy_swap_info() // create map of ghost to local atom id // store periodic boundary transform from local to ghost + k_sendlist.sync(); + if (totalsend > k_pbc.extent(0)) { k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6); k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend); From f2ef02b6d94603b1902774dbddf488a1fd06522b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 21 Mar 2019 09:27:18 -0600 Subject: [PATCH 19/34] Comm exchange is a no-op for 1 MPI rank --- src/KOKKOS/comm_kokkos.cpp | 259 ++++++++++++++++++------------------- 1 file changed, 129 insertions(+), 130 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 7432f30b95..720a79617f 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -504,9 +504,8 @@ void CommKokkos::exchange() } atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); - CommBrick::exchange(); + atomKK->modified(Host,ALL_MASK); } /* ---------------------------------------------------------------------- */ @@ -573,147 +572,149 @@ void CommKokkos::exchange_device() atom->nghost = 0; atom->avec->clear_bonus(); - // subbox bounds for orthogonal or triclinic + if (comm->nprocs > 1) { // otherwise no-op - if (triclinic == 0) { - sublo = domain->sublo; - subhi = domain->subhi; - } else { - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } + // subbox bounds for orthogonal or triclinic - atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); + if (triclinic == 0) { + sublo = domain->sublo; + subhi = domain->subhi; + } else { + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } - // loop over dimensions - for (int dim = 0; dim < 3; dim++) { + atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); - // fill buffer with atoms leaving my box, using < and >= - // when atom is deleted, fill it in with last atom + // loop over dimensions + for (int dim = 0; dim < 3; dim++) { - x = atom->x; - lo = sublo[dim]; - hi = subhi[dim]; - nlocal = atom->nlocal; - i = nsend = 0; + // fill buffer with atoms leaving my box, using < and >= + // when atom is deleted, fill it in with last atom - if (true) { - if (k_sendflag.h_view.extent(0)(); - k_count.h_view() = k_exchange_sendlist.h_view.extent(0); - while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_count.h_view() = 0; - k_count.modify(); - k_count.sync(); + x = atom->x; + lo = sublo[dim]; + hi = subhi[dim]; + nlocal = atom->nlocal; + i = nsend = 0; - BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, - nlocal,dim,lo,hi); - Kokkos::parallel_for(nlocal,f); - k_exchange_sendlist.modify(); - k_sendflag.modify(); - k_count.modify(); + if (true) { + if (k_sendflag.h_view.extent(0)(); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); + while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_count.h_view() = 0; + k_count.modify(); + k_count.sync(); - k_count.sync(); - if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_exchange_sendlist.resize(k_count.h_view()*1.1); - k_exchange_copylist.resize(k_count.h_view()*1.1); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); + BuildExchangeListFunctor + f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, + nlocal,dim,lo,hi); + Kokkos::parallel_for(nlocal,f); + k_exchange_sendlist.modify(); + k_sendflag.modify(); + k_count.modify(); + + k_count.sync(); + if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_exchange_sendlist.resize(k_count.h_view()*1.1); + k_exchange_copylist.resize(k_count.h_view()*1.1); + k_count.h_view()=k_exchange_sendlist.h_view.extent(0); + } + } + + k_exchange_lists.sync(); + k_sendflag.sync(); + + int sendpos = nlocal-1; + nlocal -= k_count.h_view(); + for(int i = 0; i < k_count.h_view(); i++) { + if (k_exchange_sendlist.h_view(i)(); + k_exchange_copylist_short.template sync(); + nsend = k_count.h_view(); + if (nsend > maxsend) grow_send_kokkos(nsend,1); + nsend = + avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, + k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space, + dim,lo,hi); + DeviceType::fence(); + } else { + while (i < nlocal) { + if (x[i][dim] < lo || x[i][dim] >= hi) { + if (nsend > maxsend) grow_send_kokkos(nsend,1); + nsend += avec->pack_exchange(i,&buf_send[nsend]); + avec->copy(nlocal-1,i,1); + nlocal--; + } else i++; + } + } + atom->nlocal = nlocal; + + // send/recv atoms in both directions + // if 1 proc in dimension, no send/recv, set recv buf to send buf + // if 2 procs in dimension, single send/recv + // if more than 2 procs in dimension, send/recv to both neighbors + + if (procgrid[dim] == 1) { + nrecv = nsend; + if (nrecv) { + atom->nlocal=avec-> + unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } + } else { + MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, + &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); + nrecv = nrecv1; + if (procgrid[dim] > 2) { + MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, + &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); + nrecv += nrecv2; + } + if (nrecv > maxrecv) grow_recv_kokkos(nrecv); + + MPI_Irecv(k_buf_recv.view().data(),nrecv1, + MPI_DOUBLE,procneigh[dim][1],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nsend, + MPI_DOUBLE,procneigh[dim][0],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (procgrid[dim] > 2) { + MPI_Irecv(k_buf_recv.view().data()+nrecv1, + nrecv2,MPI_DOUBLE,procneigh[dim][0],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nsend, + MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (nrecv) { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); } } - k_exchange_lists.sync(); - k_sendflag.sync(); + // check incoming atoms to see if they are in my box + // if so, add to my list - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for(int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); - k_exchange_copylist_short.template sync(); - nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend = - avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, - k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space, - dim,lo,hi); - DeviceType::fence(); - } else { - while (i < nlocal) { - if (x[i][dim] < lo || x[i][dim] >= hi) { - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend += avec->pack_exchange(i,&buf_send[nsend]); - avec->copy(nlocal-1,i,1); - nlocal--; - } else i++; - } } - atom->nlocal = nlocal; - - // send/recv atoms in both directions - // if 1 proc in dimension, no send/recv, set recv buf to send buf - // if 2 procs in dimension, single send/recv - // if more than 2 procs in dimension, send/recv to both neighbors - - if (procgrid[dim] == 1) { - nrecv = nsend; - if (nrecv) { - atom->nlocal=avec-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - DeviceType::fence(); - } - } else { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, - &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); - nrecv = nrecv1; - if (procgrid[dim] > 2) { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, - &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); - nrecv += nrecv2; - } - if (nrecv > maxrecv) grow_recv_kokkos(nrecv); - - MPI_Irecv(k_buf_recv.view().data(),nrecv1, - MPI_DOUBLE,procneigh[dim][1],0, - world,&request); - MPI_Send(k_buf_send.view().data(),nsend, - MPI_DOUBLE,procneigh[dim][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - if (procgrid[dim] > 2) { - MPI_Irecv(k_buf_recv.view().data()+nrecv1, - nrecv2,MPI_DOUBLE,procneigh[dim][0],0, - world,&request); - MPI_Send(k_buf_send.view().data(),nsend, - MPI_DOUBLE,procneigh[dim][1],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - } - - if (nrecv) { - atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - DeviceType::fence(); - } - } - - // check incoming atoms to see if they are in my box - // if so, add to my list - + atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); } - atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); - if (atom->firstgroupname) { /* this is not yet implemented with Kokkos */ atomKK->sync(Host,ALL_MASK); @@ -753,7 +754,6 @@ void CommKokkos::borders() else borders_device(); } else { atomKK->sync(Host,ALL_MASK); - k_sendlist.sync(); CommBrick::borders(); k_sendlist.modify(); atomKK->modified(Host,ALL_MASK); @@ -828,7 +828,6 @@ void CommKokkos::borders_device() { AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - k_sendlist.sync(); atomKK->sync(exec_space,ALL_MASK); // do swaps over all 3 dimensions From 149a57f3aea05bc2f7473e3c07157606b75bd642 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 8 Apr 2019 15:21:42 -0600 Subject: [PATCH 20/34] Add threshold for using Kokkos teams --- src/KOKKOS/pair_kokkos.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index df33b5cafc..5d724e6f73 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -866,6 +866,11 @@ int GetTeamSize(FunctorStyle& functor, int team_size, int vector_length) { template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { EV_FLOAT ev; + + if (!fpair->lmp->kokkos->team_flag_set) + if (list->inum <= 16384) + fpair->lmp->kokkos->team_flag = 1; + if (fpair->lmp->kokkos->team_flag) { int vector_length = 8; int atoms_per_team = 32; From b8d3c9e01b50e5f26213e4076e18ab3eb393dd04 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 8 Apr 2019 16:02:18 -0600 Subject: [PATCH 21/34] Rename team option to neigh/thread --- src/KOKKOS/kokkos.cpp | 16 ++++++++-------- src/KOKKOS/kokkos.h | 8 ++++---- src/KOKKOS/pair_kokkos.h | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 5611fcb74b..60d04941dc 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -184,8 +184,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) binsize = 0.0; gpu_direct_flag = 1; - team_flag = 0; - team_flag_set = 0; + neigh_thread = 0; + neigh_thread_set = 0; neighflag_qeq_set = 0; if (ngpu > 0) { neighflag = FULL; @@ -319,12 +319,12 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; - } else if (strcmp(arg[iarg],"team") == 0) { + } else if (strcmp(arg[iarg],"neigh/thread") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); - if (strcmp(arg[iarg+1],"off") == 0) team_flag = 0; - else if (strcmp(arg[iarg+1],"on") == 0) team_flag = 1; + if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0; + else if (strcmp(arg[iarg+1],"on") == 0) neigh_thread = 1; else error->all(FLERR,"Illegal package kokkos command"); - team_flag_set = 1; + neigh_thread_set = 1; iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } @@ -345,8 +345,8 @@ void KokkosLMP::accelerator(int narg, char **arg) force->newton = force->newton_pair = force->newton_bond = newtonflag; - if (team_flag && neighflag != FULL) - error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'team on'"); + if (neigh_thread && neighflag != FULL) + error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'neigh/thread on'"); neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index ad23c36e08..3804d24040 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -36,8 +36,8 @@ class KokkosLMP : protected Pointers { int numa; int auto_sync; int gpu_direct_flag; - int team_flag; - int team_flag_set; + int neigh_thread; + int neigh_thread_set; int newtonflag; double binsize; @@ -89,8 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs Using Kokkos half-neighbor lists with threading is not allowed. -E: Must use KOKKOS package option 'neigh full' with 'team on' +E: Must use KOKKOS package option 'neigh full' with 'neigh_thread on' -The 'team on' option requires a full neighbor list +The 'neigh_thread on' option requires a full neighbor list */ diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 5d724e6f73..04d756932a 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -867,11 +867,11 @@ template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { EV_FLOAT ev; - if (!fpair->lmp->kokkos->team_flag_set) + if (!fpair->lmp->kokkos->neigh_thread_set) if (list->inum <= 16384) - fpair->lmp->kokkos->team_flag = 1; + fpair->lmp->kokkos->neigh_thread = 1; - if (fpair->lmp->kokkos->team_flag) { + if (fpair->lmp->kokkos->neigh_thread) { int vector_length = 8; int atoms_per_team = 32; From 16b17f812cce2f26420817dda9d36f7fb3b65500 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 08:51:24 -0600 Subject: [PATCH 22/34] Update docs --- doc/src/package.txt | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/doc/src/package.txt b/doc/src/package.txt index a9412447b8..aef35d8d13 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -64,13 +64,16 @@ args = arguments specific to the style :l {no_affinity} values = none {kokkos} args = keyword value ... zero or more keyword/value pairs may be appended - keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct} + keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct} {neigh} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner {neigh/qeq} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner + {neigh/thread} value = {off} or {on} + off = thread only over atoms + on = thread over both atoms and neighbors {newton} = {off} or {on} off = set Newton pairwise and bonded flags off on = set Newton pairwise and bonded flags on @@ -442,7 +445,15 @@ running on CPUs, a {half} neighbor list is the default because it are often faster, just as it is for non-accelerated pair styles. Similarly, the {neigh/qeq} keyword determines how neighbor lists are built for "fix qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of -{neigh/qeq} will match {neigh}. +{neigh/qeq} will match {neigh}. + +If the {neigh/thread} keyword is set to {off}, then the KOKKOS package +threads only over atoms. However, for small systems, this may not expose +enough parallelism to keep a GPU busy. When this keyword is set to {on}, +the KOKKOS package threads over both atoms and neighbors of atoms. +Using {neigh/thread} {on} may be slower for large systems, so this this +option is turned on by default only when there are 16K atoms or less +owned by an MPI rank. The {newton} keyword sets the Newton flags for pairwise and bonded interactions to {off} or {on}, the same as the "newton"_newton.html @@ -630,11 +641,12 @@ neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default value, comm = device, gpu/direct = on. When LAMMPS can safely detect that GPU-direct is not available, the default value of gpu/direct becomes "off". For CPUs or Xeon Phis, the option defaults are neigh = -half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. These -settings are made automatically by the required "-k on" "command-line -switch"_Run_options.html. You can change them by using the package -kokkos command in your input script or via the "-pk kokkos command-line -switch"_Run_options.html. +half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The +option neigh/thread = on when there are 16K atoms or less on an MPI +rank, otherwise it is "off". These settings are made automatically by +the required "-k on" "command-line switch"_Run_options.html. You can +change them by using the package kokkos command in your input script or +via the "-pk kokkos command-line switch"_Run_options.html. For the OMP package, the default is Nthreads = 0 and the option defaults are neigh = yes. These settings are made automatically if From 82be3ee32c24b033b75f4516672a67ac412ae28c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 09:17:07 -0600 Subject: [PATCH 23/34] Only use team with full neigh list --- doc/src/package.txt | 12 ++-- src/KOKKOS/comm_kokkos.cpp | 128 ++++++++++++++++++------------------- src/KOKKOS/kokkos.h | 4 +- src/KOKKOS/pair_kokkos.h | 2 +- 4 files changed, 75 insertions(+), 71 deletions(-) diff --git a/doc/src/package.txt b/doc/src/package.txt index aef35d8d13..b6759bf2e9 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -450,10 +450,14 @@ qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of If the {neigh/thread} keyword is set to {off}, then the KOKKOS package threads only over atoms. However, for small systems, this may not expose enough parallelism to keep a GPU busy. When this keyword is set to {on}, -the KOKKOS package threads over both atoms and neighbors of atoms. -Using {neigh/thread} {on} may be slower for large systems, so this this -option is turned on by default only when there are 16K atoms or less -owned by an MPI rank. +the KOKKOS package threads over both atoms and neighbors of atoms. When +using {neigh/thread} {on}, a full neighbor list must also be used. Using +{neigh/thread} {on} may be slower for large systems, so this this option +is turned on by default only when there are 16K atoms or less owned by +an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled +potentials support this keyword yet, and only thread over atoms. Many +simple pair-wise potentials such as Lennard-Jones do support threading +over both atoms and neighbors. The {newton} keyword sets the Newton flags for pairwise and bonded interactions to {off} or {on}, the same as the "newton"_newton.html diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 720a79617f..c496065ea0 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -194,75 +194,75 @@ void CommKokkos::forward_comm_device(int dummy) k_firstrecv,k_pbc_flag,k_pbc,k_g2l); } else { - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) { - if (comm_x_only) { - if (size_forward_recv[iswap]) { - buf = atomKK->k_x.view().data() + - firstrecv[iswap]*atomKK->k_x.view().extent(1); - MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap] != me) { + if (comm_x_only) { + if (size_forward_recv[iswap]) { + buf = atomKK->k_x.view().data() + + firstrecv[iswap]*atomKK->k_x.view().extent(1); + MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + } + n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, + iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) { + MPI_Send(k_buf_send.view().data(), + n,MPI_DOUBLE,sendproc[iswap],0,world); + } + + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice:: + space,X_MASK); + } + } else if (ghost_velocity) { + if (size_forward_recv[iswap]) { + MPI_Irecv(k_buf_recv.view().data(), + size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + } + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) { + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + } + if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); + } else { + if (size_forward_recv[iswap]) + MPI_Irecv(k_buf_recv.view().data(), + size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); } - n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, - iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) { - MPI_Send(k_buf_send.view().data(), - n,MPI_DOUBLE,sendproc[iswap],0,world); - } - - if (size_forward_recv[iswap]) { - MPI_Wait(&request,MPI_STATUS_IGNORE); - atomKK->modified(ExecutionSpaceFromDevice:: - space,X_MASK); - } - } else if (ghost_velocity) { - if (size_forward_recv[iswap]) { - MPI_Irecv(k_buf_recv.view().data(), - size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - } - n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) { - MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); } else { - if (size_forward_recv[iswap]) - MPI_Irecv(k_buf_recv.view().data(), - size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) - MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); - } - } else { - if (!ghost_velocity) { - if (sendnum[iswap]) - n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, - firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - } else { - n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType::fence(); + if (!ghost_velocity) { + if (sendnum[iswap]) + n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + } else { + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); + DeviceType::fence(); + } } } } - } } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 3804d24040..ad41c83949 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -89,8 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs Using Kokkos half-neighbor lists with threading is not allowed. -E: Must use KOKKOS package option 'neigh full' with 'neigh_thread on' +E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on' -The 'neigh_thread on' option requires a full neighbor list +The 'neigh/thread on' option requires a full neighbor list */ diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 04d756932a..9ca5d9578d 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -868,7 +868,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable EV_FLOAT ev; if (!fpair->lmp->kokkos->neigh_thread_set) - if (list->inum <= 16384) + if (list->inum <= 16384 && NEIGHFLAG == FULL) fpair->lmp->kokkos->neigh_thread = 1; if (fpair->lmp->kokkos->neigh_thread) { From a01bce46bb4b854fd17afbb2beb309750ddf864d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 10:23:37 -0600 Subject: [PATCH 24/34] Reduce GPU/CPU data transfer --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 99 +++++++++----- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 60 ++++----- src/KOKKOS/atom_vec_bond_kokkos.cpp | 75 +++++++---- src/KOKKOS/atom_vec_charge_kokkos.cpp | 49 ++++--- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 89 +++++++----- src/KOKKOS/atom_vec_full_kokkos.cpp | 164 ++++++++++++++--------- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 159 +++++++++++++--------- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 64 +++++---- 8 files changed, 467 insertions(+), 292 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 352fec57fb..06ef45272b 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -1763,55 +1764,79 @@ bigint AtomVecAngleKokkos::memory_usage() void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); + auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); + auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); + auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); + auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } } } diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 80321fd2ea..ae357e6fe4 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; -#define DELTA 10 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -56,8 +56,7 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAtomicKokkos::grow(int n) { - int step = MAX(DELTA,nmax*0.01); - if (n == 0) nmax += step; + if (n == 0) nmax += DELTA; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -899,36 +898,35 @@ bigint AtomVecAtomicKokkos::memory_usage() void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) { - if (!force || force->newton) { - atomKK->k_f.sync(); - } else { - auto k_f_nlocal = Kokkos::subview(atomKK->k_f,std::make_pair(0,atom->nlocal),Kokkos::ALL); - k_f_nlocal.sync(); - } - } - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) { - if (!force || force->newton) { - atomKK->k_f.sync(); - } else { - auto k_f_nlocal = Kokkos::subview(atomKK->k_f,std::make_pair(0,atom->nlocal),Kokkos::ALL); - k_f_nlocal.sync(); - } - } - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); } } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index c884d23880..6acd536dd8 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -1175,41 +1176,61 @@ bigint AtomVecBondKokkos::memory_usage() void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); + auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); + auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); + auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); + auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } } } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 11e46d1274..807f733745 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -1068,24 +1069,38 @@ bigint AtomVecChargeKokkos::memory_usage() void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & Q_MASK) k_q.sync(); } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & Q_MASK) k_q.sync(); } } diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 30db76e723..64105eaff5 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -1856,40 +1857,62 @@ bigint AtomVecDPDKokkos::memory_usage() void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_rho = Kokkos::subview(atomKK->k_rho,std::make_pair(0,nall)); + auto k_dpdTheta = Kokkos::subview(atomKK->k_dpdTheta,std::make_pair(0,nall)); + auto k_uCond = Kokkos::subview(atomKK->k_uCond,std::make_pair(0,nall)); + auto k_uMech = Kokkos::subview(atomKK->k_uMech,std::make_pair(0,nall)); + auto k_uChem = Kokkos::subview(atomKK->k_uChem,std::make_pair(0,nall)); + auto k_uCG = Kokkos::subview(atomKK->k_uCG,std::make_pair(0,nall)); + auto k_uCGnew = Kokkos::subview(atomKK->k_uCGnew,std::make_pair(0,nall)); + auto k_duChem = Kokkos::subview(atomKK->k_duChem,std::make_pair(0,nall)); + auto k_dvector = Kokkos::subview(atomKK->k_dvector,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); - if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); - if (mask & UCOND_MASK) atomKK->k_uCond.sync(); - if (mask & UMECH_MASK) atomKK->k_uMech.sync(); - if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); - if (mask & UCG_MASK) atomKK->k_uCG.sync(); - if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); - if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & DPDRHO_MASK) k_rho.sync(); + if (mask & DPDTHETA_MASK) k_dpdTheta.sync(); + if (mask & UCOND_MASK) k_uCond.sync(); + if (mask & UMECH_MASK) k_uMech.sync(); + if (mask & UCHEM_MASK) k_uChem.sync(); + if (mask & UCG_MASK) k_uCG.sync(); + if (mask & UCGNEW_MASK) k_uCGnew.sync(); + if (mask & DUCHEM_MASK) k_duChem.sync(); + if (mask & DVECTOR_MASK) k_dvector.sync(); } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); - if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); - if (mask & UCOND_MASK) atomKK->k_uCond.sync(); - if (mask & UMECH_MASK) atomKK->k_uMech.sync(); - if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); - if (mask & UCG_MASK) atomKK->k_uCG.sync(); - if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); - if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & DPDRHO_MASK) k_rho.sync(); + if (mask & DPDTHETA_MASK) k_dpdTheta.sync(); + if (mask & UCOND_MASK) k_uCond.sync(); + if (mask & UMECH_MASK) k_uMech.sync(); + if (mask & UCHEM_MASK) k_uChem.sync(); + if (mask & UCG_MASK) k_uCG.sync(); + if (mask & UCGNEW_MASK) k_uCGnew.sync(); + if (mask & DUCHEM_MASK) k_duChem.sync(); + if (mask & DVECTOR_MASK) k_dvector.sync(); } } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index a6ae1e0ccc..f02a92d2f3 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -1651,89 +1652,126 @@ bigint AtomVecFullKokkos::memory_usage() void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); + auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); + auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); + auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); + auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); + auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_dihedral = Kokkos::subview(atomKK->k_num_dihedral,std::make_pair(0,nall)); + auto k_dihedral_type = Kokkos::subview(atomKK->k_dihedral_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom1 = Kokkos::subview(atomKK->k_dihedral_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom2 = Kokkos::subview(atomKK->k_dihedral_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom3 = Kokkos::subview(atomKK->k_dihedral_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom4 = Kokkos::subview(atomKK->k_dihedral_atom4,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_improper = Kokkos::subview(atomKK->k_num_improper,std::make_pair(0,nall)); + auto k_improper_type = Kokkos::subview(atomKK->k_improper_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom1 = Kokkos::subview(atomKK->k_improper_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom2 = Kokkos::subview(atomKK->k_improper_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom3 = Kokkos::subview(atomKK->k_improper_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom4 = Kokkos::subview(atomKK->k_improper_atom4,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & Q_MASK) atomKK->k_q.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & Q_MASK) k_q.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - atomKK->k_num_dihedral.sync(); - atomKK->k_dihedral_type.sync(); - atomKK->k_dihedral_atom1.sync(); - atomKK->k_dihedral_atom2.sync(); - atomKK->k_dihedral_atom3.sync(); - atomKK->k_dihedral_atom4.sync(); + k_num_dihedral.sync(); + k_dihedral_type.sync(); + k_dihedral_atom1.sync(); + k_dihedral_atom2.sync(); + k_dihedral_atom3.sync(); + k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - atomKK->k_num_improper.sync(); - atomKK->k_improper_type.sync(); - atomKK->k_improper_atom1.sync(); - atomKK->k_improper_atom2.sync(); - atomKK->k_improper_atom3.sync(); - atomKK->k_improper_atom4.sync(); + k_num_improper.sync(); + k_improper_type.sync(); + k_improper_atom1.sync(); + k_improper_atom2.sync(); + k_improper_atom3.sync(); + k_improper_atom4.sync(); } } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & Q_MASK) atomKK->k_q.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & Q_MASK) k_q.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - atomKK->k_num_dihedral.sync(); - atomKK->k_dihedral_type.sync(); - atomKK->k_dihedral_atom1.sync(); - atomKK->k_dihedral_atom2.sync(); - atomKK->k_dihedral_atom3.sync(); - atomKK->k_dihedral_atom4.sync(); + k_num_dihedral.sync(); + k_dihedral_type.sync(); + k_dihedral_atom1.sync(); + k_dihedral_atom2.sync(); + k_dihedral_atom3.sync(); + k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - atomKK->k_num_improper.sync(); - atomKK->k_improper_type.sync(); - atomKK->k_improper_atom1.sync(); - atomKK->k_improper_atom2.sync(); - atomKK->k_improper_atom3.sync(); - atomKK->k_improper_atom4.sync(); + k_num_improper.sync(); + k_improper_type.sync(); + k_improper_atom1.sync(); + k_improper_atom2.sync(); + k_improper_atom3.sync(); + k_improper_atom4.sync(); } } } diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 9537320976..25ef6f0c7e 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -21,10 +21,11 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 /* ---------------------------------------------------------------------- */ @@ -2049,87 +2050,123 @@ bigint AtomVecMolecularKokkos::memory_usage() void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); + auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); + auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); + auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); + auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_dihedral = Kokkos::subview(atomKK->k_num_dihedral,std::make_pair(0,nall)); + auto k_dihedral_type = Kokkos::subview(atomKK->k_dihedral_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom1 = Kokkos::subview(atomKK->k_dihedral_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom2 = Kokkos::subview(atomKK->k_dihedral_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom3 = Kokkos::subview(atomKK->k_dihedral_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_dihedral_atom4 = Kokkos::subview(atomKK->k_dihedral_atom4,std::make_pair(0,nall),Kokkos::ALL); + auto k_num_improper = Kokkos::subview(atomKK->k_num_improper,std::make_pair(0,nall)); + auto k_improper_type = Kokkos::subview(atomKK->k_improper_type,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom1 = Kokkos::subview(atomKK->k_improper_atom1,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom2 = Kokkos::subview(atomKK->k_improper_atom2,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom3 = Kokkos::subview(atomKK->k_improper_atom3,std::make_pair(0,nall),Kokkos::ALL); + auto k_improper_atom4 = Kokkos::subview(atomKK->k_improper_atom4,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - atomKK->k_num_dihedral.sync(); - atomKK->k_dihedral_type.sync(); - atomKK->k_dihedral_atom1.sync(); - atomKK->k_dihedral_atom2.sync(); - atomKK->k_dihedral_atom3.sync(); - atomKK->k_dihedral_atom4.sync(); + k_num_dihedral.sync(); + k_dihedral_type.sync(); + k_dihedral_atom1.sync(); + k_dihedral_atom2.sync(); + k_dihedral_atom3.sync(); + k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - atomKK->k_num_improper.sync(); - atomKK->k_improper_type.sync(); - atomKK->k_improper_atom1.sync(); - atomKK->k_improper_atom2.sync(); - atomKK->k_improper_atom3.sync(); - atomKK->k_improper_atom4.sync(); + k_num_improper.sync(); + k_improper_type.sync(); + k_improper_atom1.sync(); + k_improper_atom2.sync(); + k_improper_atom3.sync(); + k_improper_atom4.sync(); } } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & MOLECULE_MASK) k_molecule.sync(); if (mask & SPECIAL_MASK) { - atomKK->k_nspecial.sync(); - atomKK->k_special.sync(); + k_nspecial.sync(); + k_special.sync(); } if (mask & BOND_MASK) { - atomKK->k_num_bond.sync(); - atomKK->k_bond_type.sync(); - atomKK->k_bond_atom.sync(); + k_num_bond.sync(); + k_bond_type.sync(); + k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - atomKK->k_num_angle.sync(); - atomKK->k_angle_type.sync(); - atomKK->k_angle_atom1.sync(); - atomKK->k_angle_atom2.sync(); - atomKK->k_angle_atom3.sync(); + k_num_angle.sync(); + k_angle_type.sync(); + k_angle_atom1.sync(); + k_angle_atom2.sync(); + k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - atomKK->k_num_dihedral.sync(); - atomKK->k_dihedral_type.sync(); - atomKK->k_dihedral_atom1.sync(); - atomKK->k_dihedral_atom2.sync(); - atomKK->k_dihedral_atom3.sync(); - atomKK->k_dihedral_atom4.sync(); + k_num_dihedral.sync(); + k_dihedral_type.sync(); + k_dihedral_atom1.sync(); + k_dihedral_atom2.sync(); + k_dihedral_atom3.sync(); + k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - atomKK->k_num_improper.sync(); - atomKK->k_improper_type.sync(); - atomKK->k_improper_atom1.sync(); - atomKK->k_improper_atom2.sync(); - atomKK->k_improper_atom3.sync(); - atomKK->k_improper_atom4.sync(); + k_num_improper.sync(); + k_improper_type.sync(); + k_improper_atom1.sync(); + k_improper_atom2.sync(); + k_improper_atom3.sync(); + k_improper_atom4.sync(); } } } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index f05e8d09df..df86cacccc 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -27,10 +27,11 @@ #include "memory.h" #include "error.h" #include "memory_kokkos.h" +#include "force.h" using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 16384 static const double MY_PI = 3.14159265358979323846; // pi @@ -2791,30 +2792,47 @@ bigint AtomVecSphereKokkos::memory_usage() void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) { + int nlocal = atom->nlocal; + int nall = atom->nlocal + atom->nghost; + + // avoid unnecessary data transfer + + auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); + auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); + auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); + auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); + auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); + auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); + auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); + auto k_radius = Kokkos::subview(atomKK->k_radius,std::make_pair(0,nall)); + auto k_rmass = Kokkos::subview(atomKK->k_rmass,std::make_pair(0,nall)); + auto k_omega = Kokkos::subview(atomKK->k_omega,std::make_pair(0,nall),Kokkos::ALL); + auto k_torque = Kokkos::subview(atomKK->k_torque,std::make_pair(0,nall),Kokkos::ALL); + if (space == Device) { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & RADIUS_MASK) atomKK->k_radius.sync(); - if (mask & RMASS_MASK) atomKK->k_rmass.sync(); - if (mask & OMEGA_MASK) atomKK->k_omega.sync(); - if (mask & TORQUE_MASK) atomKK->k_torque.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & RADIUS_MASK) k_radius.sync(); + if (mask & RMASS_MASK) k_rmass.sync(); + if (mask & OMEGA_MASK) k_omega.sync(); + if (mask & TORQUE_MASK) k_torque.sync(); } else { - if (mask & X_MASK) atomKK->k_x.sync(); - if (mask & V_MASK) atomKK->k_v.sync(); - if (mask & F_MASK) atomKK->k_f.sync(); - if (mask & TAG_MASK) atomKK->k_tag.sync(); - if (mask & TYPE_MASK) atomKK->k_type.sync(); - if (mask & MASK_MASK) atomKK->k_mask.sync(); - if (mask & IMAGE_MASK) atomKK->k_image.sync(); - if (mask & RADIUS_MASK) atomKK->k_radius.sync(); - if (mask & RMASS_MASK) atomKK->k_rmass.sync(); - if (mask & OMEGA_MASK) atomKK->k_omega.sync(); - if (mask & TORQUE_MASK) atomKK->k_torque.sync(); + if (mask & X_MASK) k_x.sync(); + if (mask & V_MASK) k_v.sync(); + if (mask & F_MASK) k_f.sync(); + if (mask & TAG_MASK) k_tag.sync(); + if (mask & TYPE_MASK) k_type.sync(); + if (mask & MASK_MASK) k_mask.sync(); + if (mask & IMAGE_MASK) k_image.sync(); + if (mask & RADIUS_MASK) k_radius.sync(); + if (mask & RMASS_MASK) k_rmass.sync(); + if (mask & OMEGA_MASK) k_omega.sync(); + if (mask & TORQUE_MASK) k_torque.sync(); } } From 618547b72e2954f2c70663d6359e4536635b8284 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 14:40:20 -0600 Subject: [PATCH 25/34] Reduce DELTA and revert subview change --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 102 ++++++-------- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 47 +++---- src/KOKKOS/atom_vec_bond_kokkos.cpp | 78 ++++------- src/KOKKOS/atom_vec_charge_kokkos.cpp | 52 +++---- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 92 +++++-------- src/KOKKOS/atom_vec_full_kokkos.cpp | 167 +++++++++-------------- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 162 +++++++++------------- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 67 ++++----- 8 files changed, 292 insertions(+), 475 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 06ef45272b..df455dd3ff 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -60,7 +59,8 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAngleKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -1764,79 +1764,55 @@ bigint AtomVecAngleKokkos::memory_usage() void AtomVecAngleKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); - auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); - auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); - auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); - auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } } } diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index ae357e6fe4..e3c1bee956 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -56,7 +55,8 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAtomicKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -898,35 +898,22 @@ bigint AtomVecAtomicKokkos::memory_usage() void AtomVecAtomicKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); } } diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 6acd536dd8..825b141b8b 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -57,7 +56,8 @@ AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecBondKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -1176,61 +1176,41 @@ bigint AtomVecBondKokkos::memory_usage() void AtomVecBondKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); - auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); - auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); - auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); - auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } } } diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 807f733745..933e029aa4 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -59,7 +58,8 @@ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecChargeKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -1069,38 +1069,24 @@ bigint AtomVecChargeKokkos::memory_usage() void AtomVecChargeKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & Q_MASK) k_q.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & Q_MASK) k_q.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); } } diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 64105eaff5..8da89a49ca 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -61,7 +60,8 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecDPDKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -1857,62 +1857,40 @@ bigint AtomVecDPDKokkos::memory_usage() void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_rho = Kokkos::subview(atomKK->k_rho,std::make_pair(0,nall)); - auto k_dpdTheta = Kokkos::subview(atomKK->k_dpdTheta,std::make_pair(0,nall)); - auto k_uCond = Kokkos::subview(atomKK->k_uCond,std::make_pair(0,nall)); - auto k_uMech = Kokkos::subview(atomKK->k_uMech,std::make_pair(0,nall)); - auto k_uChem = Kokkos::subview(atomKK->k_uChem,std::make_pair(0,nall)); - auto k_uCG = Kokkos::subview(atomKK->k_uCG,std::make_pair(0,nall)); - auto k_uCGnew = Kokkos::subview(atomKK->k_uCGnew,std::make_pair(0,nall)); - auto k_duChem = Kokkos::subview(atomKK->k_duChem,std::make_pair(0,nall)); - auto k_dvector = Kokkos::subview(atomKK->k_dvector,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & DPDRHO_MASK) k_rho.sync(); - if (mask & DPDTHETA_MASK) k_dpdTheta.sync(); - if (mask & UCOND_MASK) k_uCond.sync(); - if (mask & UMECH_MASK) k_uMech.sync(); - if (mask & UCHEM_MASK) k_uChem.sync(); - if (mask & UCG_MASK) k_uCG.sync(); - if (mask & UCGNEW_MASK) k_uCGnew.sync(); - if (mask & DUCHEM_MASK) k_duChem.sync(); - if (mask & DVECTOR_MASK) k_dvector.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); + if (mask & UCG_MASK) atomKK->k_uCG.sync(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & DPDRHO_MASK) k_rho.sync(); - if (mask & DPDTHETA_MASK) k_dpdTheta.sync(); - if (mask & UCOND_MASK) k_uCond.sync(); - if (mask & UMECH_MASK) k_uMech.sync(); - if (mask & UCHEM_MASK) k_uChem.sync(); - if (mask & UCG_MASK) k_uCG.sync(); - if (mask & UCGNEW_MASK) k_uCGnew.sync(); - if (mask & DUCHEM_MASK) k_duChem.sync(); - if (mask & DVECTOR_MASK) k_dvector.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & DPDRHO_MASK) atomKK->k_rho.sync(); + if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync(); + if (mask & UCOND_MASK) atomKK->k_uCond.sync(); + if (mask & UMECH_MASK) atomKK->k_uMech.sync(); + if (mask & UCHEM_MASK) atomKK->k_uChem.sync(); + if (mask & UCG_MASK) atomKK->k_uCG.sync(); + if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); + if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); + if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } } diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index f02a92d2f3..c07f3e0381 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -59,7 +58,8 @@ AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecFullKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -1652,126 +1652,89 @@ bigint AtomVecFullKokkos::memory_usage() void AtomVecFullKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_q = Kokkos::subview(atomKK->k_q,std::make_pair(0,nall)); - auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); - auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); - auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); - auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); - auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_dihedral = Kokkos::subview(atomKK->k_num_dihedral,std::make_pair(0,nall)); - auto k_dihedral_type = Kokkos::subview(atomKK->k_dihedral_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom1 = Kokkos::subview(atomKK->k_dihedral_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom2 = Kokkos::subview(atomKK->k_dihedral_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom3 = Kokkos::subview(atomKK->k_dihedral_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom4 = Kokkos::subview(atomKK->k_dihedral_atom4,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_improper = Kokkos::subview(atomKK->k_num_improper,std::make_pair(0,nall)); - auto k_improper_type = Kokkos::subview(atomKK->k_improper_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom1 = Kokkos::subview(atomKK->k_improper_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom2 = Kokkos::subview(atomKK->k_improper_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom3 = Kokkos::subview(atomKK->k_improper_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom4 = Kokkos::subview(atomKK->k_improper_atom4,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & Q_MASK) k_q.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - k_num_dihedral.sync(); - k_dihedral_type.sync(); - k_dihedral_atom1.sync(); - k_dihedral_atom2.sync(); - k_dihedral_atom3.sync(); - k_dihedral_atom4.sync(); + atomKK->k_num_dihedral.sync(); + atomKK->k_dihedral_type.sync(); + atomKK->k_dihedral_atom1.sync(); + atomKK->k_dihedral_atom2.sync(); + atomKK->k_dihedral_atom3.sync(); + atomKK->k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - k_num_improper.sync(); - k_improper_type.sync(); - k_improper_atom1.sync(); - k_improper_atom2.sync(); - k_improper_atom3.sync(); - k_improper_atom4.sync(); + atomKK->k_num_improper.sync(); + atomKK->k_improper_type.sync(); + atomKK->k_improper_atom1.sync(); + atomKK->k_improper_atom2.sync(); + atomKK->k_improper_atom3.sync(); + atomKK->k_improper_atom4.sync(); } } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & Q_MASK) k_q.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & Q_MASK) atomKK->k_q.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - k_num_dihedral.sync(); - k_dihedral_type.sync(); - k_dihedral_atom1.sync(); - k_dihedral_atom2.sync(); - k_dihedral_atom3.sync(); - k_dihedral_atom4.sync(); + atomKK->k_num_dihedral.sync(); + atomKK->k_dihedral_type.sync(); + atomKK->k_dihedral_atom1.sync(); + atomKK->k_dihedral_atom2.sync(); + atomKK->k_dihedral_atom3.sync(); + atomKK->k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - k_num_improper.sync(); - k_improper_type.sync(); - k_improper_atom1.sync(); - k_improper_atom2.sync(); - k_improper_atom3.sync(); - k_improper_atom4.sync(); + atomKK->k_num_improper.sync(); + atomKK->k_improper_type.sync(); + atomKK->k_improper_atom1.sync(); + atomKK->k_improper_atom2.sync(); + atomKK->k_improper_atom3.sync(); + atomKK->k_improper_atom4.sync(); } } } diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 25ef6f0c7e..f832cddce2 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -21,11 +21,10 @@ #include "atom_masks.h" #include "memory_kokkos.h" #include "error.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -59,7 +58,8 @@ AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecMolecularKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -2050,123 +2050,87 @@ bigint AtomVecMolecularKokkos::memory_usage() void AtomVecMolecularKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_molecule = Kokkos::subview(atomKK->k_molecule,std::make_pair(0,nall)); - auto k_nspecial = Kokkos::subview(atomKK->k_nspecial,std::make_pair(0,nall),Kokkos::ALL); - auto k_special = Kokkos::subview(atomKK->k_special,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_bond = Kokkos::subview(atomKK->k_num_bond,std::make_pair(0,nall)); - auto k_bond_type = Kokkos::subview(atomKK->k_bond_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_bond_atom = Kokkos::subview(atomKK->k_bond_atom,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_angle = Kokkos::subview(atomKK->k_num_angle,std::make_pair(0,nall)); - auto k_angle_type = Kokkos::subview(atomKK->k_angle_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom1 = Kokkos::subview(atomKK->k_angle_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom2 = Kokkos::subview(atomKK->k_angle_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_angle_atom3 = Kokkos::subview(atomKK->k_angle_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_dihedral = Kokkos::subview(atomKK->k_num_dihedral,std::make_pair(0,nall)); - auto k_dihedral_type = Kokkos::subview(atomKK->k_dihedral_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom1 = Kokkos::subview(atomKK->k_dihedral_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom2 = Kokkos::subview(atomKK->k_dihedral_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom3 = Kokkos::subview(atomKK->k_dihedral_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_dihedral_atom4 = Kokkos::subview(atomKK->k_dihedral_atom4,std::make_pair(0,nall),Kokkos::ALL); - auto k_num_improper = Kokkos::subview(atomKK->k_num_improper,std::make_pair(0,nall)); - auto k_improper_type = Kokkos::subview(atomKK->k_improper_type,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom1 = Kokkos::subview(atomKK->k_improper_atom1,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom2 = Kokkos::subview(atomKK->k_improper_atom2,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom3 = Kokkos::subview(atomKK->k_improper_atom3,std::make_pair(0,nall),Kokkos::ALL); - auto k_improper_atom4 = Kokkos::subview(atomKK->k_improper_atom4,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - k_num_dihedral.sync(); - k_dihedral_type.sync(); - k_dihedral_atom1.sync(); - k_dihedral_atom2.sync(); - k_dihedral_atom3.sync(); - k_dihedral_atom4.sync(); + atomKK->k_num_dihedral.sync(); + atomKK->k_dihedral_type.sync(); + atomKK->k_dihedral_atom1.sync(); + atomKK->k_dihedral_atom2.sync(); + atomKK->k_dihedral_atom3.sync(); + atomKK->k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - k_num_improper.sync(); - k_improper_type.sync(); - k_improper_atom1.sync(); - k_improper_atom2.sync(); - k_improper_atom3.sync(); - k_improper_atom4.sync(); + atomKK->k_num_improper.sync(); + atomKK->k_improper_type.sync(); + atomKK->k_improper_atom1.sync(); + atomKK->k_improper_atom2.sync(); + atomKK->k_improper_atom3.sync(); + atomKK->k_improper_atom4.sync(); } } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & MOLECULE_MASK) k_molecule.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & MOLECULE_MASK) atomKK->k_molecule.sync(); if (mask & SPECIAL_MASK) { - k_nspecial.sync(); - k_special.sync(); + atomKK->k_nspecial.sync(); + atomKK->k_special.sync(); } if (mask & BOND_MASK) { - k_num_bond.sync(); - k_bond_type.sync(); - k_bond_atom.sync(); + atomKK->k_num_bond.sync(); + atomKK->k_bond_type.sync(); + atomKK->k_bond_atom.sync(); } if (mask & ANGLE_MASK) { - k_num_angle.sync(); - k_angle_type.sync(); - k_angle_atom1.sync(); - k_angle_atom2.sync(); - k_angle_atom3.sync(); + atomKK->k_num_angle.sync(); + atomKK->k_angle_type.sync(); + atomKK->k_angle_atom1.sync(); + atomKK->k_angle_atom2.sync(); + atomKK->k_angle_atom3.sync(); } if (mask & DIHEDRAL_MASK) { - k_num_dihedral.sync(); - k_dihedral_type.sync(); - k_dihedral_atom1.sync(); - k_dihedral_atom2.sync(); - k_dihedral_atom3.sync(); - k_dihedral_atom4.sync(); + atomKK->k_num_dihedral.sync(); + atomKK->k_dihedral_type.sync(); + atomKK->k_dihedral_atom1.sync(); + atomKK->k_dihedral_atom2.sync(); + atomKK->k_dihedral_atom3.sync(); + atomKK->k_dihedral_atom4.sync(); } if (mask & IMPROPER_MASK) { - k_num_improper.sync(); - k_improper_type.sync(); - k_improper_atom1.sync(); - k_improper_atom2.sync(); - k_improper_atom3.sync(); - k_improper_atom4.sync(); + atomKK->k_num_improper.sync(); + atomKK->k_improper_type.sync(); + atomKK->k_improper_atom1.sync(); + atomKK->k_improper_atom2.sync(); + atomKK->k_improper_atom3.sync(); + atomKK->k_improper_atom4.sync(); } } } diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index df86cacccc..17c2e8d804 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -27,11 +27,10 @@ #include "memory.h" #include "error.h" #include "memory_kokkos.h" -#include "force.h" using namespace LAMMPS_NS; -#define DELTA 16384 +#define DELTA 10 static const double MY_PI = 3.14159265358979323846; // pi @@ -94,7 +93,8 @@ void AtomVecSphereKokkos::init() void AtomVecSphereKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atom->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) @@ -2792,47 +2792,30 @@ bigint AtomVecSphereKokkos::memory_usage() void AtomVecSphereKokkos::sync(ExecutionSpace space, unsigned int mask) { - int nlocal = atom->nlocal; - int nall = atom->nlocal + atom->nghost; - - // avoid unnecessary data transfer - - auto k_x = Kokkos::subview(atomKK->k_x,std::make_pair(0,nall),Kokkos::ALL); - auto k_v = Kokkos::subview(atomKK->k_v,std::make_pair(0,nall),Kokkos::ALL); - auto k_f = Kokkos::subview(atomKK->k_f,std::make_pair(0,(!force || force->newton)?nall:nlocal),Kokkos::ALL); - auto k_tag = Kokkos::subview(atomKK->k_tag,std::make_pair(0,nall)); - auto k_type = Kokkos::subview(atomKK->k_type,std::make_pair(0,nall)); - auto k_mask = Kokkos::subview(atomKK->k_mask,std::make_pair(0,nall)); - auto k_image = Kokkos::subview(atomKK->k_image,std::make_pair(0,nall)); - auto k_radius = Kokkos::subview(atomKK->k_radius,std::make_pair(0,nall)); - auto k_rmass = Kokkos::subview(atomKK->k_rmass,std::make_pair(0,nall)); - auto k_omega = Kokkos::subview(atomKK->k_omega,std::make_pair(0,nall),Kokkos::ALL); - auto k_torque = Kokkos::subview(atomKK->k_torque,std::make_pair(0,nall),Kokkos::ALL); - if (space == Device) { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & RADIUS_MASK) k_radius.sync(); - if (mask & RMASS_MASK) k_rmass.sync(); - if (mask & OMEGA_MASK) k_omega.sync(); - if (mask & TORQUE_MASK) k_torque.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync(); } else { - if (mask & X_MASK) k_x.sync(); - if (mask & V_MASK) k_v.sync(); - if (mask & F_MASK) k_f.sync(); - if (mask & TAG_MASK) k_tag.sync(); - if (mask & TYPE_MASK) k_type.sync(); - if (mask & MASK_MASK) k_mask.sync(); - if (mask & IMAGE_MASK) k_image.sync(); - if (mask & RADIUS_MASK) k_radius.sync(); - if (mask & RMASS_MASK) k_rmass.sync(); - if (mask & OMEGA_MASK) k_omega.sync(); - if (mask & TORQUE_MASK) k_torque.sync(); + if (mask & X_MASK) atomKK->k_x.sync(); + if (mask & V_MASK) atomKK->k_v.sync(); + if (mask & F_MASK) atomKK->k_f.sync(); + if (mask & TAG_MASK) atomKK->k_tag.sync(); + if (mask & TYPE_MASK) atomKK->k_type.sync(); + if (mask & MASK_MASK) atomKK->k_mask.sync(); + if (mask & IMAGE_MASK) atomKK->k_image.sync(); + if (mask & RADIUS_MASK) atomKK->k_radius.sync(); + if (mask & RMASS_MASK) atomKK->k_rmass.sync(); + if (mask & OMEGA_MASK) atomKK->k_omega.sync(); + if (mask & TORQUE_MASK) atomKK->k_torque.sync(); } } From 073f0034707d5babc4effb4507f81ff296e732bd Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 15:17:40 -0600 Subject: [PATCH 26/34] Doc tweak --- doc/src/Speed_kokkos.txt | 30 ++++++++++++++++-------------- doc/src/package.txt | 27 ++++++++++++++------------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/doc/src/Speed_kokkos.txt b/doc/src/Speed_kokkos.txt index 23155cd540..fd33491253 100644 --- a/doc/src/Speed_kokkos.txt +++ b/doc/src/Speed_kokkos.txt @@ -46,7 +46,7 @@ software version 7.5 or later must be installed on your system. See the discussion for the "GPU package"_Speed_gpu.html for details of how to check and do this. -NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI +NOTE: Kokkos with CUDA currently implicitly assumes that the MPI library is CUDA-aware and has support for GPU-direct. This is not always the case, especially when using pre-compiled MPI libraries provided by a Linux distribution. This is not a problem when using @@ -207,19 +207,21 @@ supports. [Running on GPUs:] -Use the "-k" "command-line switch"_Run_options.html to -specify the number of GPUs per node. Typically the -np setting of the -mpirun command should set the number of MPI tasks/node to be equal to -the number of physical GPUs on the node. You can assign multiple MPI -tasks to the same GPU with the KOKKOS package, but this is usually -only faster if significant portions of the input script have not -been ported to use Kokkos. Using CUDA MPS is recommended in this -scenario. Using a CUDA-aware MPI library with support for GPU-direct -is highly recommended. GPU-direct use can be avoided by using -"-pk kokkos gpu/direct no"_package.html. -As above for multi-core CPUs (and no GPU), if N is the number of -physical cores/node, then the number of MPI tasks/node should not -exceed N. +Use the "-k" "command-line switch"_Run_options.html to specify the +number of GPUs per node. Typically the -np setting of the mpirun command +should set the number of MPI tasks/node to be equal to the number of +physical GPUs on the node. You can assign multiple MPI tasks to the same +GPU with the KOKKOS package, but this is usually only faster if some +portions of the input script have not been ported to use Kokkos. In this +case, also packing/unpacking communication buffers on the host may give +speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS +is recommended in this scenario. + +Using a CUDA-aware MPI library with +support for GPU-direct is highly recommended. GPU-direct use can be +avoided by using "-pk kokkos gpu/direct no"_package.html. As above for +multi-core CPUs (and no GPU), if N is the number of physical cores/node, +then the number of MPI tasks/node should not exceed N. -k on g Ng :pre diff --git a/doc/src/package.txt b/doc/src/package.txt index b6759bf2e9..2f17b2c7f3 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -490,10 +490,10 @@ are rebuilt. The data is only for atoms that migrate to new processors. "Forward" communication happens every timestep. "Reverse" communication happens every timestep if the {newton} option is on. The data is for atom coordinates and any other atom properties that needs to be updated -for ghost atoms owned by each processor. +for ghost atoms owned by each processor. The {comm} keyword is simply a short-cut to set the same value for both -the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. +the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. The value options for all 3 keywords are {no} or {host} or {device}. A value of {no} means to use the standard non-KOKKOS method of @@ -501,26 +501,26 @@ packing/unpacking data for the communication. A value of {host} means to use the host, typically a multi-core CPU, and perform the packing/unpacking in parallel with threads. A value of {device} means to use the device, typically a GPU, to perform the packing/unpacking -operation. +operation. The optimal choice for these keywords depends on the input script and the hardware used. The {no} value is useful for verifying that the Kokkos-based {host} and {device} values are working correctly. It is the -default when running on CPUs since it is usually the fastest. +default when running on CPUs since it is usually the fastest. When running on CPUs or Xeon Phi, the {host} and {device} values work identically. When using GPUs, the {device} value is the default since it will typically be optimal if all of your styles used in your input script are supported by the KOKKOS package. In this case data can stay on the GPU for many timesteps without being moved between the host and -GPU, if you use the {device} value. This requires that your MPI is able -to access GPU memory directly. Currently that is true for OpenMPI 1.8 -(or later versions), Mvapich2 1.9 (or later), and CrayMPI. If your -script uses styles (e.g. fixes) which are not yet supported by the -KOKKOS package, then data has to be move between the host and device -anyway, so it is typically faster to let the host handle communication, -by using the {host} value. Using {host} instead of {no} will enable use -of multiple threads to pack/unpack communicated data. +GPU, if you use the {device} value. If your script uses styles (e.g. +fixes) which are not yet supported by the KOKKOS package, then data has +to be move between the host and device anyway, so it is typically faster +to let the host handle communication, by using the {host} value. Using +{host} instead of {no} will enable use of multiple threads to +pack/unpack communicated data. When running small systems on a GPU, +performing the exchange pack/unpack on the host CPU can give speedup +since it reduces the number of CUDA kernel launches. The {gpu/direct} keyword chooses whether GPU-direct will be used. When this keyword is set to {on}, buffers in GPU memory are passed directly @@ -533,7 +533,8 @@ the {gpu/direct} keyword is automatically set to {off} by default. When the {gpu/direct} keyword is set to {off} while any of the {comm} keywords are set to {device}, the value for these {comm} keywords will be automatically changed to {host}. This setting has no effect if not -running on GPUs. +running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later +versions), Mvapich2 1.9 (or later), and CrayMPI. :line From cf35ebe5fa1ca153cf1cf08aa8ee0b4cfb88d47c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 9 Apr 2019 16:17:37 -0600 Subject: [PATCH 27/34] Revert optimization that is causing regression tests to fail --- src/KOKKOS/comm_kokkos.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c496065ea0..52829356d9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -637,9 +637,8 @@ void CommKokkos::exchange_device() k_exchange_copylist.h_view(i) = -1; } - auto k_exchange_copylist_short = Kokkos::subview(k_exchange_copylist,k_count.h_view()); - k_exchange_copylist_short.template modify(); - k_exchange_copylist_short.template sync(); + k_exchange_copylist.modify(); + k_exchange_copylist.sync(); nsend = k_count.h_view(); if (nsend > maxsend) grow_send_kokkos(nsend,1); nsend = From e4e2249b637d00dd0f0e524da20dd5ddf69d947c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 10 Apr 2019 10:17:14 -0600 Subject: [PATCH 28/34] Fix issue in comm_kokkos --- src/KOKKOS/comm_kokkos.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 52829356d9..87986a9ca9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -617,8 +617,9 @@ void CommKokkos::exchange_device() k_count.sync(); if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_exchange_sendlist.resize(k_count.h_view()*1.1); - k_exchange_copylist.resize(k_count.h_view()*1.1); + k_exchange_lists.resize(2,k_count.h_view()*1.1); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); k_count.h_view()=k_exchange_sendlist.h_view.extent(0); } } From a0bc619550a18566c4a3079b9692508387afb098 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 21 May 2019 09:21:55 -0600 Subject: [PATCH 29/34] Need to call atomKK version of sync/modified in Kokkos atom_vec styles --- src/KOKKOS/atom_vec_angle_kokkos.cpp | 44 +++++----- src/KOKKOS/atom_vec_atomic_kokkos.cpp | 20 ++--- src/KOKKOS/atom_vec_bond_kokkos.cpp | 20 ++--- src/KOKKOS/atom_vec_charge_kokkos.cpp | 18 ++-- src/KOKKOS/atom_vec_dpd_kokkos.cpp | 78 ++++++++--------- src/KOKKOS/atom_vec_full_kokkos.cpp | 22 ++--- src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 32 +++---- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 44 +++++----- src/KOKKOS/atom_vec_sphere_kokkos.cpp | 106 +++++++++++------------ 9 files changed, 192 insertions(+), 192 deletions(-) diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index df455dd3ff..e4f27e733a 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -66,8 +66,8 @@ void AtomVecAngleKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -99,7 +99,7 @@ void AtomVecAngleKokkos::grow(int n) "atom:angle_atom3"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -283,7 +283,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackComm f(atomKK->k_x,buf,list,iswap, @@ -310,7 +310,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK); + atomKK->sync(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackComm f(atomKK->k_x,buf,list,iswap, @@ -398,8 +398,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackCommSelf @@ -430,8 +430,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li } } } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackCommSelf @@ -494,13 +494,13 @@ struct AtomVecAngleKokkos_UnpackComm { void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } @@ -643,7 +643,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -660,7 +660,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf) void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -961,9 +961,9 @@ struct AtomVecAngleKokkos_UnpackBorder { void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecAngleKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -985,7 +985,7 @@ void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1011,7 +1011,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1413,7 +1413,7 @@ int AtomVecAngleKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); @@ -1488,7 +1488,7 @@ int AtomVecAngleKokkos::size_restart() int AtomVecAngleKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); @@ -1542,7 +1542,7 @@ int AtomVecAngleKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index e3c1bee956..95e4ddd72b 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -62,8 +62,8 @@ void AtomVecAtomicKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -75,7 +75,7 @@ void AtomVecAtomicKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -394,9 +394,9 @@ struct AtomVecAtomicKokkos_UnpackBorder { void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); if(space==Host) { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,first); Kokkos::parallel_for(n,f); @@ -416,7 +416,7 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -441,7 +441,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -669,7 +669,7 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK); int m = 1; @@ -721,7 +721,7 @@ int AtomVecAtomicKokkos::size_restart() int AtomVecAtomicKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK ); int m = 1; @@ -756,7 +756,7 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK ); int m = 1; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 21768c6009..92311d5d09 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -65,8 +65,8 @@ void AtomVecBondKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -85,7 +85,7 @@ void AtomVecBondKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++) @@ -469,9 +469,9 @@ struct AtomVecBondKokkos_UnpackBorder { void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecBondKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -493,7 +493,7 @@ void AtomVecBondKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -519,7 +519,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -867,7 +867,7 @@ int AtomVecBondKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int k; @@ -935,7 +935,7 @@ int AtomVecBondKokkos::size_restart() int AtomVecBondKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int m = 1; buf[m++] = h_x(i,0); @@ -979,7 +979,7 @@ int AtomVecBondKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 933e029aa4..31a690f521 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -65,8 +65,8 @@ void AtomVecChargeKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -80,7 +80,7 @@ void AtomVecChargeKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -495,7 +495,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, f(buf.view(),d_x,d_tag,d_type,d_mask,d_q,first); Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); } /* ---------------------------------------------------------------------- */ @@ -511,7 +511,7 @@ void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf) if (i == nmax) { grow(0); } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -537,7 +537,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -798,7 +798,7 @@ int AtomVecChargeKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; @@ -851,7 +851,7 @@ int AtomVecChargeKokkos::size_restart() int AtomVecChargeKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; @@ -889,7 +889,7 @@ int AtomVecChargeKokkos::unpack_restart(double *buf) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 8da89a49ca..4034efee9e 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -67,8 +67,8 @@ void AtomVecDPDKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -94,7 +94,7 @@ void AtomVecDPDKokkos::grow(int n) modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); } /* ---------------------------------------------------------------------- @@ -159,7 +159,7 @@ void AtomVecDPDKokkos::grow_reset() void AtomVecDPDKokkos::copy(int i, int j, int delflag) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -185,7 +185,7 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -269,7 +269,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -304,7 +304,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -411,8 +411,8 @@ struct AtomVecDPDKokkos_PackCommSelf { int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -447,8 +447,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list } } } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -529,15 +529,15 @@ struct AtomVecDPDKokkos_UnpackComm { void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); @@ -553,7 +553,7 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); m = 0; if (pbc_flag == 0) { @@ -599,7 +599,7 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); m = 0; if (pbc_flag == 0) { @@ -686,7 +686,7 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) h_uChem[i] = buf[m++]; } - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -710,7 +710,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) h_uChem[i] = buf[m++]; } - modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -718,7 +718,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -735,8 +735,8 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) { - sync(Host,F_MASK); - modified(Host,F_MASK); + atomKK->sync(Host,F_MASK); + atomKK->modified(Host,F_MASK); } int m = 0; @@ -820,7 +820,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA { X_FLOAT dx,dy,dz; - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -877,7 +877,7 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -938,7 +938,7 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1033,7 +1033,7 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) { int i,j,m; - sync(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK); m = 0; @@ -1053,7 +1053,7 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) { int i,j,m; - sync(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); m = 0; @@ -1128,11 +1128,11 @@ struct AtomVecDPDKokkos_UnpackBorder { void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); if(space==Host) { @@ -1180,7 +1180,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } @@ -1218,7 +1218,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } @@ -1238,7 +1238,7 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_uChem(i) = buf[m++]; } - modified(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK ); return m; @@ -1261,7 +1261,7 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) h_uCGnew(i) = buf[m++]; } - modified(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); return m; @@ -1385,7 +1385,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1403,7 +1403,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d int AtomVecDPDKokkos::pack_exchange(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1519,7 +1519,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre k_count.sync(); } - modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1557,7 +1557,7 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf) m += modify->fix[atom->extra_grow[iextra]]-> unpack_exchange(nlocal,&buf[m]); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1594,7 +1594,7 @@ int AtomVecDPDKokkos::size_restart() int AtomVecDPDKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -1659,7 +1659,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index c07f3e0381..034da88f73 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -65,8 +65,8 @@ void AtomVecFullKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -124,7 +124,7 @@ void AtomVecFullKokkos::grow(int n) atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -609,9 +609,9 @@ struct AtomVecFullKokkos_UnpackBorder { void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecFullKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); @@ -633,7 +633,7 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -660,7 +660,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1205,7 +1205,7 @@ int AtomVecFullKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1298,7 +1298,7 @@ int AtomVecFullKokkos::size_restart() int AtomVecFullKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1371,10 +1371,10 @@ int AtomVecFullKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index ce36f59053..03cbe1ee5e 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -307,7 +307,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); int i,j,k,m; double dx,dy,dz; @@ -351,7 +351,7 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; @@ -463,7 +463,7 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf) h_x(i,2) = buf[m++]; } - modified(Host,X_MASK); + atomKK->modified(Host,X_MASK); // unpack sub-style contributions as contiguous chunks @@ -500,7 +500,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) } } - modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // unpack sub-style contributions as contiguous chunks @@ -512,7 +512,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf) { - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int i,k,m,last; @@ -546,7 +546,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) h_f(j,2) += buf[m++]; } - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); // unpack sub-style contributions as contiguous chunks @@ -559,7 +559,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); int i,j,k,m; double dx,dy,dz; @@ -613,7 +613,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; int omega_flag = atom->omega_flag; @@ -741,7 +741,7 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf) h_mask[i] = (int) ubuf(buf[m++]).i; } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); // unpack sub-style contributions as contiguous chunks @@ -787,7 +787,7 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf) } } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // unpack sub-style contributions as contiguous chunks @@ -969,7 +969,7 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord) void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); @@ -1000,7 +1000,7 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val h_angmom(nlocal,2) = 0.0; } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // each sub-style parses sub-style specific values @@ -1017,13 +1017,13 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val void AtomVecHybridKokkos::data_vel(int m, char **values) { - sync(Host,V_MASK); + atomKK->sync(Host,V_MASK); h_v(m,0) = atof(values[0]); h_v(m,1) = atof(values[1]); h_v(m,2) = atof(values[2]); - modified(Host,V_MASK); + atomKK->modified(Host,V_MASK); // each sub-style parses sub-style specific values @@ -1038,7 +1038,7 @@ void AtomVecHybridKokkos::data_vel(int m, char **values) void AtomVecHybridKokkos::pack_data(double **buf) { - sync(Host,TAG_MASK|TYPE_MASK|X_MASK); + atomKK->sync(Host,TAG_MASK|TYPE_MASK|X_MASK); int k,m; @@ -1089,7 +1089,7 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf) void AtomVecHybridKokkos::pack_vel(double **buf) { - sync(Host,V_MASK); + atomKK->sync(Host,V_MASK); int k,m; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index f832cddce2..9ac8ecd264 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -65,8 +65,8 @@ void AtomVecMolecularKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -122,7 +122,7 @@ void AtomVecMolecularKokkos::grow(int n) atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -362,7 +362,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackComm @@ -389,7 +389,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK); + atomKK->sync(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackComm @@ -478,8 +478,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackCommSelf @@ -506,8 +506,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d } } } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackCommSelf @@ -566,13 +566,13 @@ struct AtomVecMolecularKokkos_UnpackComm { void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } @@ -715,7 +715,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -732,7 +732,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf) void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1033,9 +1033,9 @@ struct AtomVecMolecularKokkos_UnpackBorder { void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecMolecularKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -1057,7 +1057,7 @@ void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1083,7 +1083,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1616,7 +1616,7 @@ int AtomVecMolecularKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1708,7 +1708,7 @@ int AtomVecMolecularKokkos::size_restart() int AtomVecMolecularKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1781,7 +1781,7 @@ int AtomVecMolecularKokkos::unpack_restart(double *buf) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 17c2e8d804..9e8388488f 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -100,8 +100,8 @@ void AtomVecSphereKokkos::grow(int n) if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -121,7 +121,7 @@ void AtomVecSphereKokkos::grow(int n) modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); } /* ---------------------------------------------------------------------- @@ -173,7 +173,7 @@ void AtomVecSphereKokkos::grow_reset() void AtomVecSphereKokkos::copy(int i, int j, int delflag) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -198,7 +198,7 @@ void AtomVecSphereKokkos::copy(int i, int j, int delflag) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); } @@ -278,7 +278,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos( // Check whether to always run forward communication on the host // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackComm f( @@ -317,7 +317,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackComm f( @@ -465,7 +465,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos( const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if(pbc_flag) { if(deform_vremap) { if(domain->triclinic) { @@ -596,7 +596,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if(pbc_flag) { if(deform_vremap) { if(domain->triclinic) { @@ -796,8 +796,8 @@ int AtomVecSphereKokkos::pack_comm_self( if (radvary == 0) return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc); if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackCommSelf f( @@ -836,8 +836,8 @@ int AtomVecSphereKokkos::pack_comm_self( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackCommSelf f( @@ -927,14 +927,14 @@ void AtomVecSphereKokkos::unpack_comm_kokkos( return; } if(commKK->forward_comm_on_host) { - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); struct AtomVecSphereKokkos_UnpackComm f( atomKK->k_x, atomKK->k_radius,atomKK->k_rmass, buf,first); Kokkos::parallel_for(n,f); } else { - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); struct AtomVecSphereKokkos_UnpackComm f( atomKK->k_x, atomKK->k_radius,atomKK->k_rmass, @@ -999,7 +999,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos( const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if (radvary == 0) { struct AtomVecSphereKokkos_UnpackCommVel f( atomKK->k_x, @@ -1016,7 +1016,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos( Kokkos::parallel_for(n,f); } } else { - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if (radvary == 0) { struct AtomVecSphereKokkos_UnpackCommVel f( atomKK->k_x, @@ -1045,7 +1045,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, if (radvary == 0) { // Not sure if we need to call sync for X here - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1072,7 +1072,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, } } } else { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1116,7 +1116,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, double dx,dy,dz,dvx,dvy,dvz; if (radvary == 0) { - sync(Host,X_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1179,7 +1179,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, } } } else { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1258,7 +1258,7 @@ int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf) { if (radvary == 0) return 0; - sync(Host,RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1281,7 +1281,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; } - modified(Host,X_MASK); + atomKK->modified(Host,X_MASK); } else { int m = 0; const int last = first + n; @@ -1292,7 +1292,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); } } @@ -1314,7 +1314,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) h_omega(i,1) = buf[m++]; h_omega(i,2) = buf[m++]; } - modified(Host,X_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK); } else { int m = 0; const int last = first + n; @@ -1331,7 +1331,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) h_omega(i,1) = buf[m++]; h_omega(i,2) = buf[m++]; } - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } } @@ -1347,7 +1347,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,RADIUS_MASK|RMASS_MASK); return m; } @@ -1356,7 +1356,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK|TORQUE_MASK); + atomKK->sync(Host,F_MASK|TORQUE_MASK); int m = 0; const int last = first + n; @@ -1376,7 +1376,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) { if(n > 0) - sync(Host,TORQUE_MASK); + atomKK->sync(Host,TORQUE_MASK); int m = 0; const int last = first + n; @@ -1393,7 +1393,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) { - modified(Host,F_MASK|TORQUE_MASK); + atomKK->modified(Host,F_MASK|TORQUE_MASK); } int m = 0; @@ -1413,7 +1413,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf) { if(n > 0) { - modified(Host,TORQUE_MASK); + atomKK->modified(Host,TORQUE_MASK); } int m = 0; @@ -1493,7 +1493,7 @@ int AtomVecSphereKokkos::pack_border_kokkos( X_FLOAT dx,dy,dz; // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -1550,7 +1550,7 @@ int AtomVecSphereKokkos::pack_border( int i,j,m; double dx,dy,dz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1687,7 +1687,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos( X_FLOAT dvx=0,dvy=0,dvz=0; // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -1777,7 +1777,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1867,7 +1867,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf) { - sync(Host,RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1943,7 +1943,7 @@ void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| RADIUS_MASK|RMASS_MASK); } @@ -1970,7 +1970,7 @@ void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); } @@ -2053,7 +2053,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos( Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } @@ -2086,7 +2086,7 @@ void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } /* ---------------------------------------------------------------------- */ @@ -2099,7 +2099,7 @@ int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,RADIUS_MASK|RMASS_MASK); return m; } @@ -2219,7 +2219,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2240,7 +2240,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( int AtomVecSphereKokkos::pack_exchange(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2355,7 +2355,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.sync(); } - modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2392,7 +2392,7 @@ int AtomVecSphereKokkos::unpack_exchange(double *buf) m += modify->fix[atom->extra_grow[iextra]]-> unpack_exchange(nlocal,&buf[m]); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2428,7 +2428,7 @@ int AtomVecSphereKokkos::size_restart() int AtomVecSphereKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | V_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2495,7 +2495,7 @@ int AtomVecSphereKokkos::unpack_restart(double *buf) for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } - modified(Host,X_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | V_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2617,14 +2617,14 @@ int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values) void AtomVecSphereKokkos::data_vel(int m, char **values) { - sync(Host,V_MASK|OMEGA_MASK); + atomKK->sync(Host,V_MASK|OMEGA_MASK); h_v(m,0) = atof(values[0]); h_v(m,1) = atof(values[1]); h_v(m,2) = atof(values[2]); h_omega(m,0) = atof(values[3]); h_omega(m,1) = atof(values[4]); h_omega(m,2) = atof(values[5]); - modified(Host,V_MASK|OMEGA_MASK); + atomKK->modified(Host,V_MASK|OMEGA_MASK); } /* ---------------------------------------------------------------------- @@ -2633,11 +2633,11 @@ void AtomVecSphereKokkos::data_vel(int m, char **values) int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values) { - sync(Host,OMEGA_MASK); + atomKK->sync(Host,OMEGA_MASK); omega[m][0] = atof(values[0]); omega[m][1] = atof(values[1]); omega[m][2] = atof(values[2]); - modified(Host,OMEGA_MASK); + atomKK->modified(Host,OMEGA_MASK); return 3; } @@ -2712,7 +2712,7 @@ int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf) void AtomVecSphereKokkos::pack_vel(double **buf) { - sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { @@ -2732,7 +2732,7 @@ void AtomVecSphereKokkos::pack_vel(double **buf) int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf) { - sync(Host,OMEGA_MASK); + atomKK->sync(Host,OMEGA_MASK); buf[0] = h_omega(i,0); buf[1] = h_omega(i,1); From 439e7da03f7a67fb0b74788659a81d8a6a6618be Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 21 May 2019 11:47:26 -0600 Subject: [PATCH 30/34] Need auto-sync on for initialization --- src/KOKKOS/verlet_kokkos.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index d75a7e491f..73ba7d3d07 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -93,7 +93,6 @@ void VerletKokkos::setup(int flag) } update->setupflag = 1; - lmp->kokkos->auto_sync = 0; // setup domain, communication and neighboring // acquire ghosts @@ -189,7 +188,6 @@ void VerletKokkos::setup(int flag) modify->setup(vflag); output->setup(flag); - lmp->kokkos->auto_sync = 1; update->setupflag = 0; } @@ -202,7 +200,6 @@ void VerletKokkos::setup(int flag) void VerletKokkos::setup_minimal(int flag) { update->setupflag = 1; - lmp->kokkos->auto_sync = 0; // setup domain, communication and neighboring // acquire ghosts @@ -294,7 +291,6 @@ void VerletKokkos::setup_minimal(int flag) if (force->newton) comm->reverse_comm(); modify->setup(vflag); - lmp->kokkos->auto_sync = 1; update->setupflag = 0; } From eea67bf3bfce2a36d4079b4741cc82d9f68421f0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 22 May 2019 08:52:57 -0600 Subject: [PATCH 31/34] Add sync/modify for growing dvector --- src/KOKKOS/atom_kokkos.cpp | 3 +++ src/KOKKOS/fix_property_atom_kokkos.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index b54719e852..813c5ddbf2 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -22,6 +22,7 @@ #include "memory_kokkos.h" #include "error.h" #include "kokkos.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -270,8 +271,10 @@ int AtomKokkos::add_custom(const char *name, int flag) int n = strlen(name) + 1; dname[index] = new char[n]; strcpy(dname[index],name); + this->sync(Device,DVECTOR_MASK); memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax, "atom:dvector"); + this->modified(Device,DVECTOR_MASK); } return index; diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 12f27f9932..6860676911 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -19,6 +19,7 @@ #include "memory_kokkos.h" #include "error.h" #include "update.h" +#include "atom_masks.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -61,8 +62,10 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[m]][nmax_old],0,nbytes); } else if (style[m] == DOUBLE) { + atomKK->sync(Device,DVECTOR_MASK); memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax, "atom:dvector"); + atomKK->modified(Device,DVECTOR_MASK); //memory->grow(atom->dvector[index[m]],nmax,"atom:dvector"); //size_t nbytes = (nmax-nmax_old) * sizeof(double); //memset(&atom->dvector[index[m]][nmax_old],0,nbytes); From e44c87773862cdf42fbb3e682cb12310f4d70e64 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 28 May 2019 10:21:29 -0600 Subject: [PATCH 32/34] Add missing tag sync in fix_qeq_reax_kokkos --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 13 ++++++++++--- src/KOKKOS/verlet_kokkos.cpp | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 9969ab7257..d007c469a9 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -58,7 +58,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK; + datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; datamask_modify = Q_MASK | X_MASK; nmax = nmax = m_cap = 0; @@ -164,6 +164,9 @@ void FixQEqReaxKokkos::init_shielding_k() template void FixQEqReaxKokkos::init_hist() { + k_s_hist.clear_sync_state(); + k_t_hist.clear_sync_state(); + Kokkos::deep_copy(d_s_hist,0.0); Kokkos::deep_copy(d_t_hist,0.0); @@ -189,7 +192,6 @@ void FixQEqReaxKokkos::pre_force(int vflag) if (update->ntimestep % nevery) return; atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); x = atomKK->k_x.view(); v = atomKK->k_v.view(); @@ -273,6 +275,8 @@ void FixQEqReaxKokkos::pre_force(int vflag) // free duplicated memory if (need_dup) dup_o = decltype(dup_o)(); + + atomKK->modified(execution_space,datamask_modify); } /* ---------------------------------------------------------------------- */ @@ -1199,9 +1203,12 @@ double FixQEqReaxKokkos::memory_usage() template void FixQEqReaxKokkos::grow_arrays(int nmax) { - k_s_hist.template sync(); // force reallocation on host + k_s_hist.template sync(); k_t_hist.template sync(); + k_s_hist.template modify(); // force reallocation on host + k_t_hist.template modify(); + memoryKK->grow_kokkos(k_s_hist,s_hist,nmax,nprev,"qeq:s_hist"); memoryKK->grow_kokkos(k_t_hist,t_hist,nmax,nprev,"qeq:t_hist"); diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 73ba7d3d07..ceacb3f8b4 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -186,7 +186,9 @@ void VerletKokkos::setup(int flag) } if (force->newton) comm->reverse_comm(); + lmp->kokkos->auto_sync = 0; modify->setup(vflag); + lmp->kokkos->auto_sync = 1; output->setup(flag); update->setupflag = 0; } From 3b606868272b8cc3d241bbdad0793b47b22a1400 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 29 May 2019 09:43:50 -0600 Subject: [PATCH 33/34] Small tweak to verlet_kokkos --- src/KOKKOS/verlet_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index ceacb3f8b4..b80d5e0646 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -188,8 +188,8 @@ void VerletKokkos::setup(int flag) lmp->kokkos->auto_sync = 0; modify->setup(vflag); - lmp->kokkos->auto_sync = 1; output->setup(flag); + lmp->kokkos->auto_sync = 1; update->setupflag = 0; } From b88158fc3bfe46db198ba152027c1de2d66ddc4e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 29 May 2019 11:16:38 -0600 Subject: [PATCH 34/34] Fix issue in npair_kokkos --- src/KOKKOS/npair_kokkos.cpp | 6 +++--- src/KOKKOS/npair_kokkos.h | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index ecf4b2d5a5..4daf4b84c5 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -140,7 +140,7 @@ void NPairKokkos::build(NeighList *list_) k_bincount.view(), k_bins.view(), k_atom2bin.view(), - nstencil, + mbins,nstencil, k_stencil.view(), k_stencilxyz.view(), nlocal, @@ -511,7 +511,7 @@ void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPoli const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; - if(ibin >=c_bincount.extent(0)) return; + if(ibin >= mbins) return; X_FLOAT* other_x = sharedmem; other_x = other_x + 5*atoms_per_bin*MY_BIN; @@ -947,7 +947,7 @@ void NeighborKokkosExecute::build_ItemSizeCuda(typename Kokkos::Team const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; - if(ibin >=c_bincount.extent(0)) return; + if(ibin >= mbins) return; X_FLOAT* other_x = sharedmem; other_x = other_x + 6*atoms_per_bin*MY_BIN; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index edf3d2a59f..2a3994f584 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -173,6 +173,7 @@ class NeighborKokkosExecute // data from NBin class + const int mbins; const typename AT::t_int_1d bincount; const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; @@ -226,7 +227,7 @@ class NeighborKokkosExecute const typename AT::t_int_1d &_bincount, const typename AT::t_int_2d &_bins, const typename AT::t_int_1d &_atom2bin, - const int _nstencil, + const int _mbins,const int _nstencil, const typename AT::t_int_1d &_d_stencil, const typename AT::t_int_1d_3 &_d_stencilxyz, const int _nlocal, @@ -264,7 +265,7 @@ class NeighborKokkosExecute const typename ArrayTypes::t_int_scalar _h_resize, const typename AT::t_int_scalar _new_maxneighs, const typename ArrayTypes::t_int_scalar _h_new_maxneighs): - neigh_list(_neigh_list), cutneighsq(_cutneighsq), + neigh_list(_neigh_list), cutneighsq(_cutneighsq),mbins(_mbins), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),