From e5fffa59ef5e244f5516415ef5822ce8d645cde6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 18 Jul 2019 15:29:45 -0600 Subject: [PATCH 1/3] Add optimized version of Kokkos SNAP --- src/KOKKOS/pair_snap_kokkos.h | 50 ++- src/KOKKOS/pair_snap_kokkos_impl.h | 431 +++++++++--------- src/KOKKOS/sna_kokkos.h | 97 ++-- src/KOKKOS/sna_kokkos_impl.h | 681 ++++++++++------------------- 4 files changed, 527 insertions(+), 732 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 8be0bf9afb..2193e9ff24 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -31,10 +31,17 @@ PairStyle(snap/kk/host,PairSNAPKokkos) namespace LAMMPS_NS { template -struct TagPairSNAPCompute{}; +struct TagPairSNAPComputeForce{}; struct TagPairSNAPBeta{}; -struct TagPairSNAPBispectrum{}; +struct TagPairSNAPComputeNeigh{}; +struct TagPairSNAPPreUi{}; +struct TagPairSNAPComputeUi{}; +struct TagPairSNAPComputeZi{}; +struct TagPairSNAPComputeBi{}; +struct TagPairSNAPComputeYi{}; +struct TagPairSNAPComputeDuidrj{}; +struct TagPairSNAPComputeDeidrj{}; template class PairSNAPKokkos : public PairSNAP { @@ -56,18 +63,39 @@ public: template KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team) const; + void operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team) const; template KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT&) const; + void operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT&) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const; KOKKOS_INLINE_FUNCTION void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy::member_type& team) const; - KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy::member_type& team) const; - template KOKKOS_INLINE_FUNCTION void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, @@ -90,15 +118,10 @@ protected: t_dbvec dbvec; SNAKokkos snaKK; - // How much parallelism to use within an interaction - int vector_length,team_size; - int team_scratch_size; - int thread_scratch_size; + int inum,max_neighs,chunk_offset; int eflag,vflag; - void compute_beta(); - void compute_bispectrum(); void allocate(); //void read_files(char *, char *); /*template @@ -131,6 +154,7 @@ inline double dist2(double* x,double* y); Kokkos::View d_wjelem; // elements weights Kokkos::View d_coeffelem; // element bispectrum coefficients Kokkos::View d_map; // mapping from atom types to elements + Kokkos::View d_ninside; // ninside for all atoms in list Kokkos::View d_beta; // betas for all atoms in list Kokkos::View d_bispectrum; // bispectrum components for all atoms in list diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 8d72d1fac4..645521a17b 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -57,7 +57,6 @@ PairSNAPKokkos::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp) datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - vector_length = 8; k_cutsq = tdual_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1); auto d_cutsq = k_cutsq.template view(); rnd_cutsq = d_cutsq; @@ -164,7 +163,7 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; - int inum = list->inum; + inum = list->inum; need_dup = lmp->kokkos->need_dup(); if (need_dup) { @@ -181,70 +180,112 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) const int num_neighs = neighs_i.get_num_neighs(); if (max_neighs(k_list), Kokkos::Experimental::Max(max_neighs)); - snaKK.nmax = max_neighs; - - team_scratch_size = snaKK.size_team_scratch_arrays(); - thread_scratch_size = snaKK.size_thread_scratch_arrays(); - - //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); + int vector_length = 1; + int ui_vector_length = 1; + int team_size = 1; + int yi_team_size = 1; int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); - vector_length = 8; #ifdef KOKKOS_ENABLE_CUDA team_size = 32;//max_neighs; if (team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; -#else - team_size = 1; + + yi_team_size = 256; + if (yi_team_size*vector_length > team_size_max) + yi_team_size = team_size_max/vector_length; + + ui_vector_length = 8; + if (team_size*ui_vector_length > team_size_max) + team_size = team_size_max/ui_vector_length; #endif - if (beta_max < list->inum) { - d_beta = Kokkos::View("PairSNAPKokkos:beta", - list->inum,ncoeff); - d_bispectrum = Kokkos::View("PairSNAPKokkos:bispectrum", - list->inum,ncoeff); - beta_max = list->inum; + if (beta_max < inum) { + beta_max = inum; + d_beta = Kokkos::View("PairSNAPKokkos:beta",inum,ncoeff); + d_ninside = Kokkos::View("PairSNAPKokkos:ninside",inum); } - // compute dE_i/dB_i = beta_i for all i in list + int chunk_size = 2000; + chunk_offset = 0; - if (quadraticflag || eflag) - compute_bispectrum(); - compute_beta(); + snaKK.grow_rij(chunk_size,max_neighs); EV_FLOAT ev; - if (eflag) { - if (neighflag == HALF) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); - Kokkos::parallel_reduce(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this,ev); - } else if (neighflag == HALFTHREAD) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); - Kokkos::parallel_reduce(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this,ev); + while (chunk_offset < inum) { // chunk up loop to prevent running out of memory + + EV_FLOAT ev_tmp; + + if (chunk_size > inum - chunk_offset) + chunk_size = inum - chunk_offset; + + //ComputeNeigh + typename Kokkos::TeamPolicy policy_neigh(chunk_size,team_size,vector_length); + Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this); + + //PreUi + typename Kokkos::TeamPolicy policy_preui(chunk_size,team_size,vector_length); + Kokkos::parallel_for("PreUi",policy_preui,*this); + + //ComputeUi + typename Kokkos::TeamPolicy policy_ui(((inum+team_size-1)/team_size)*max_neighs,team_size,ui_vector_length); + Kokkos::parallel_for("ComputeUi",policy_ui,*this); + + //Compute bispectrum + if (quadraticflag || eflag) { + //ComputeZi + typename Kokkos::TeamPolicy policy_zi(chunk_size,team_size,vector_length); + Kokkos::parallel_for("ComputeZi",policy_zi,*this); + + //ComputeBi + typename Kokkos::TeamPolicy policy_bi(chunk_size,team_size,vector_length); + Kokkos::parallel_for("ComputeBi",policy_bi,*this); } - } else { - if (neighflag == HALF) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); - Kokkos::parallel_for(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this); - } else if (neighflag == HALFTHREAD) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); - Kokkos::parallel_for(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this); + + //Compute beta = dE_i/dB_i for all i in list + typename Kokkos::TeamPolicy policy_beta(chunk_size,team_size,vector_length); + Kokkos::parallel_for("ComputeBeta",policy_beta,*this); + + //ComputeYi + typename Kokkos::TeamPolicy policy_yi(chunk_size,yi_team_size,vector_length); + Kokkos::parallel_for("ComputeYi",policy_yi,*this); + + //ComputeDuidrj + typename Kokkos::TeamPolicy policy_duidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length); + Kokkos::parallel_for("ComputeDuidrj",policy_duidrj,*this); + + //ComputeDeidrj + typename Kokkos::TeamPolicy policy_deidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length); + Kokkos::parallel_for("ComputeDeidrj",policy_deidrj,*this); + + //ComputeForce + if (eflag) { + if (neighflag == HALF) { + typename Kokkos::TeamPolicy > policy_force(chunk_size,team_size,vector_length); + Kokkos::parallel_reduce(policy_force + ,*this,ev_tmp); + } else if (neighflag == HALFTHREAD) { + typename Kokkos::TeamPolicy > policy_force(chunk_size,team_size,vector_length); + Kokkos::parallel_reduce(policy_force + ,*this,ev_tmp); + } + } else { + if (neighflag == HALF) { + typename Kokkos::TeamPolicy > policy_force(chunk_size,team_size,vector_length); + Kokkos::parallel_for(policy_force + ,*this); + } else if (neighflag == HALFTHREAD) { + typename Kokkos::TeamPolicy > policy_force(chunk_size,team_size,vector_length); + Kokkos::parallel_for(policy_force + ,*this); + } } - } + ev += ev_tmp; + chunk_offset += chunk_size; + } // end while if (need_dup) Kokkos::Experimental::contribute(f, dup_f); @@ -284,32 +325,19 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) } } -/* ---------------------------------------------------------------------- - compute beta -------------------------------------------------------------------------- */ - -template -void PairSNAPKokkos::compute_beta() -{ - // TODO: use RangePolicy instead, or thread over ncoeff? - int inum = list->inum; - typename Kokkos::TeamPolicy policy(inum,team_size,vector_length); - Kokkos::parallel_for(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this); -} - /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy::member_type& team) const { + // TODO: use RangePolicy instead, or thread over ncoeff? const int ii = team.league_rank(); - const int i = d_ilist[ii]; + const int i = d_ilist[ii + chunk_offset]; const int itype = type[i]; const int ielem = d_map[itype]; + SNAKokkos my_sna = snaKK; + Kokkos::View> d_coeffi(d_coeffelem,ielem,Kokkos::ALL); @@ -319,11 +347,11 @@ void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokk if (quadraticflag) { int k = ncoeff+1; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = d_bispectrum(ii,icoeff); + double bveci = my_sna.blist(ii,icoeff); d_beta(ii,icoeff) += d_coeffi[k]*bveci; k++; for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double bvecj = d_bispectrum(ii,jcoeff); + double bvecj = my_sna.blist(ii,jcoeff); d_beta(ii,icoeff) += d_coeffi[k]*bvecj; d_beta(ii,jcoeff) += d_coeffi[k]*bveci; k++; @@ -332,105 +360,6 @@ void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokk } } -/* ---------------------------------------------------------------------- - compute bispectrum -------------------------------------------------------------------------- */ - -template -void PairSNAPKokkos::compute_bispectrum() -{ - int inum = list->inum; - typename Kokkos::TeamPolicy policy(inum,team_size,vector_length); - Kokkos::parallel_for(policy - .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) - .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) - ,*this); -} - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy::member_type& team) const { - - const int ii = team.league_rank(); - const int i = d_ilist[ii]; - SNAKokkos my_sna(snaKK,team); - const double xtmp = x(i,0); - const double ytmp = x(i,1); - const double ztmp = x(i,2); - const int itype = type[i]; - const int ielem = d_map[itype]; - const double radi = d_radelem[ielem]; - - const int num_neighs = d_numneigh[i]; - - // rij[][3] = displacements between atom I and those neighbors - // inside = indices of neighbors of I within cutoff - // wj = weights for neighbors of I within cutoff - // rcutij = cutoffs for neighbors of I within cutoff - // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - - int ninside = 0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs), - [&] (const int jj, int& count) { - Kokkos::single(Kokkos::PerThread(team), [&] (){ - T_INT j = d_neighbors(i,jj); - const F_FLOAT dx = x(j,0) - xtmp; - const F_FLOAT dy = x(j,1) - ytmp; - const F_FLOAT dz = x(j,2) - ztmp; - - const int jtype = type(j); - const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; - const int elem_j = d_map[jtype]; - - if ( rsq < rnd_cutsq(itype,jtype) ) - count++; - }); - },ninside); - - if (team.team_rank() == 0) - Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs), - [&] (const int jj, int& offset, bool final) { - //for (int jj = 0; jj < num_neighs; jj++) { - T_INT j = d_neighbors(i,jj); - const F_FLOAT dx = x(j,0) - xtmp; - const F_FLOAT dy = x(j,1) - ytmp; - const F_FLOAT dz = x(j,2) - ztmp; - - const int jtype = type(j); - const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; - const int elem_j = d_map[jtype]; - - if ( rsq < rnd_cutsq(itype,jtype) ) { - if (final) { - my_sna.rij(offset,0) = dx; - my_sna.rij(offset,1) = dy; - my_sna.rij(offset,2) = dz; - my_sna.inside[offset] = j; - my_sna.wj[offset] = d_wjelem[elem_j]; - my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac; - } - offset++; - } - }); - team.team_barrier(); - - // compute Ui, Zi, and Bi for atom I - - my_sna.compute_ui(team,ninside); - team.team_barrier(); - - my_sna.compute_zi(team); - team.team_barrier(); - - my_sna.compute_bi(team); - team.team_barrier(); - - for (int icoeff = 0; icoeff < ncoeff; icoeff++) - d_bispectrum(ii,icoeff) = my_sna.blist[icoeff]; -} - /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ @@ -496,30 +425,21 @@ void PairSNAPKokkos::coeff(int narg, char **arg) Kokkos::deep_copy(d_coeffelem,h_coeffelem); Kokkos::deep_copy(d_map,h_map); - // allocate memory for per OpenMP thread data which - // is wrapped into the sna class - snaKK = SNAKokkos(rfac0,twojmax, rmin0,switchflag,bzeroflag); - snaKK.grow_rij(0); + snaKK.grow_rij(0,0); snaKK.init(); } /* ---------------------------------------------------------------------- */ template -template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { - - // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial - - auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access::value>(); +void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { const int ii = team.league_rank(); - const int i = d_ilist[ii]; - SNAKokkos my_sna(snaKK,team); + const int i = d_ilist[ii + chunk_offset]; + SNAKokkos my_sna = snaKK; const double xtmp = x(i,0); const double ytmp = x(i,1); const double ztmp = x(i,2); @@ -553,6 +473,8 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute::operator() (TagPairSNAPCompute +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy::member_type& team) const { + const int ii = team.league_rank(); + SNAKokkos my_sna = snaKK; + my_sna.pre_ui(team,ii); +} - my_sna.compute_ui(team,ninside); - team.team_barrier(); +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; - // for neighbors of I within cutoff: - // compute Fij = dEi/dRj = -dEi/dRi - // add to Fi, subtract from Fj + // Extract the atom number + const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + if (ii >= inum) return; - my_sna.compute_yi(team,d_beta,ii); - team.team_barrier(); + // Extract the neighbor number + const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size()); + const int ninside = d_ninside(ii); + if (jj >= ninside) return; - Kokkos::View> - d_coeffi(d_coeffelem,ielem,Kokkos::ALL); + my_sna.compute_ui(team,ii,jj); +} + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy::member_type& team) const { + const int ii = team.league_rank(); + SNAKokkos my_sna = snaKK; + my_sna.compute_yi(team,ii,d_beta); +} + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy::member_type& team) const { + const int ii = team.league_rank(); + SNAKokkos my_sna = snaKK; + my_sna.compute_zi(team,ii); +} + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy::member_type& team) const { + const int ii = team.league_rank(); + SNAKokkos my_sna = snaKK; + my_sna.compute_bi(team,ii); +} + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; + + // Extract the atom number + const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + if (ii >= inum) return; + + // Extract the neighbor number + const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size()); + const int ninside = d_ninside(ii); + if (jj >= ninside) return; + + my_sna.compute_duidrj(team,ii,jj); +} + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy::member_type& team) const { + SNAKokkos my_sna = snaKK; + + // Extract the atom number + const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + if (ii >= inum) return; + + // Extract the neighbor number + const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size()); + const int ninside = d_ninside(ii); + if (jj >= ninside) return; + + my_sna.compute_deidrj(team,ii,jj); +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { + + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + auto a_f = v_f.template access::value>(); + + const int ii = team.league_rank(); + const int i = d_ilist[ii + chunk_offset]; + SNAKokkos my_sna = snaKK; + const int ninside = d_ninside(ii); Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside), [&] (const int jj) { - //for (int jj = 0; jj < ninside; jj++) { - int j = my_sna.inside[jj]; - - my_sna.compute_duidrj(team,&my_sna.rij(jj,0), - my_sna.wj[jj],my_sna.rcutij[jj],jj); + int j = my_sna.inside(ii,jj); F_FLOAT fij[3]; - my_sna.compute_deidrj(team,fij); + fij[0] = my_sna.dedr(ii,jj,0); + fij[1] = my_sna.dedr(ii,jj,1); + fij[2] = my_sna.dedr(ii,jj,2); Kokkos::single(Kokkos::PerThread(team), [&] (){ a_f(i,0) += fij[0]; @@ -620,8 +623,8 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute(ev,i,j, fij[0],fij[1],fij[2], - -my_sna.rij(jj,0),-my_sna.rij(jj,1), - -my_sna.rij(jj,2)); + -my_sna.rij(ii,jj,0),-my_sna.rij(ii,jj,1), + -my_sna.rij(ii,jj,2)); } } @@ -633,6 +636,11 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute> + d_coeffi(d_coeffelem,ielem,Kokkos::ALL); + Kokkos::single(Kokkos::PerTeam(team), [&] () { // evdwl = energy of atom I, sum over coeffs_k * Bi_k @@ -640,21 +648,21 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute::operator() (TagPairSNAPCompute template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPComputeForce,const typename Kokkos::TeamPolicy >::member_type& team) const { EV_FLOAT ev; - this->template operator()(TagPairSNAPCompute(), team, ev); + this->template operator()(TagPairSNAPComputeForce(), team, ev); } /* ---------------------------------------------------------------------- */ @@ -738,4 +746,5 @@ double PairSNAPKokkos::memory_usage() bytes += snaKK.memory_usage(); return bytes; } + } diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index a53a614229..2dbfdcb47c 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -25,6 +25,9 @@ namespace LAMMPS_NS { +typedef double SNAreal; +typedef struct { SNAreal re, im; } SNAcomplex; + struct SNAKK_ZINDICES { int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju; }; @@ -39,12 +42,22 @@ class SNAKokkos { public: typedef Kokkos::View t_sna_1i; typedef Kokkos::View t_sna_1d; - typedef Kokkos::View > t_sna_1d_atomic; - typedef Kokkos::View t_sna_2d; - typedef Kokkos::View t_sna_3d; - typedef Kokkos::View t_sna_4d; - typedef Kokkos::View t_sna_3d3; - typedef Kokkos::View t_sna_5d; + typedef Kokkos::View > t_sna_1d_atomic; + typedef Kokkos::View t_sna_2i; + typedef Kokkos::View t_sna_2d; + typedef Kokkos::View t_sna_3d; + typedef Kokkos::View t_sna_4d; + typedef Kokkos::View t_sna_3d3; + typedef Kokkos::View t_sna_5d; + + typedef Kokkos::View t_sna_1c; + typedef Kokkos::View > t_sna_1c_atomic; + typedef Kokkos::View t_sna_2c; + typedef Kokkos::View t_sna_2c_cpu; + typedef Kokkos::View t_sna_3c; + typedef Kokkos::View t_sna_4c; + typedef Kokkos::View t_sna_3c3; + typedef Kokkos::View t_sna_5c; inline SNAKokkos() {}; @@ -63,36 +76,31 @@ inline inline void init(); // -inline - T_INT size_team_scratch_arrays(); - -inline - T_INT size_thread_scratch_arrays(); - double memory_usage(); int ncoeff; // functions for bispectrum coefficients - KOKKOS_INLINE_FUNCTION - void compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int); // ForceSNAP + void pre_ui(const typename Kokkos::TeamPolicy::member_type& team, int); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_zi(const typename Kokkos::TeamPolicy::member_type& team); // ForceSNAP + void compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_yi(const typename Kokkos::TeamPolicy::member_type& team, - const Kokkos::View &beta, const int ii); // ForceSNAP + void compute_ui_orig(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_bi(const typename Kokkos::TeamPolicy::member_type& team); // ForceSNAP + void compute_zi(const typename Kokkos::TeamPolicy::member_type& team, int); // ForceSNAP + KOKKOS_INLINE_FUNCTION + void compute_yi(const typename Kokkos::TeamPolicy::member_type& team, int, + const Kokkos::View &beta); // ForceSNAP + KOKKOS_INLINE_FUNCTION + void compute_bi(const typename Kokkos::TeamPolicy::member_type& team, int); // ForceSNAP // functions for derivatives KOKKOS_INLINE_FUNCTION - void compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, double*, double, double, int); //ForceSNAP + void compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, int, int); //ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_dbidrj(const typename Kokkos::TeamPolicy::member_type& team); //ForceSNAP - KOKKOS_INLINE_FUNCTION - void compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, double *); // ForceSNAP + void compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, int, int); // ForceSNAP KOKKOS_INLINE_FUNCTION double compute_sfac(double, double); // add_uarraytot, compute_duarray KOKKOS_INLINE_FUNCTION @@ -109,29 +117,26 @@ inline // Per InFlight Particle - t_sna_2d rij; - t_sna_1i inside; - t_sna_1d wj; - t_sna_1d rcutij; - int nmax; + t_sna_3d rij; + t_sna_2i inside; + t_sna_2d wj; + t_sna_2d rcutij; + t_sna_3d dedr; + int natom, nmax; - void grow_rij(int); + void grow_rij(int, int); int twojmax, diagonalstyle; - // Per InFlight Particle - t_sna_1d blist; - t_sna_1d ulisttot_r, ulisttot_i; - t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a; - t_sna_1d zlist_r, zlist_i; - t_sna_2d ulist_r_ij, ulist_i_ij; + + t_sna_2d blist; + t_sna_2c_cpu ulisttot; + t_sna_2c zlist; - // Per InFlight Interaction - t_sna_1d ulist_r, ulist_i; - t_sna_1d_atomic ylist_r, ylist_i; + t_sna_3c ulist; + t_sna_2c ylist; // derivatives of data - t_sna_2d dulist_r, dulist_i; - t_sna_2d dblist; + t_sna_4c dulist; private: double rmin0, rfac0; @@ -168,14 +173,14 @@ inline inline void init_rootpqarray(); // init() KOKKOS_INLINE_FUNCTION - void zero_uarraytot(const typename Kokkos::TeamPolicy::member_type& team); // compute_ui + void zero_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int); // compute_ui KOKKOS_INLINE_FUNCTION - void addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double); // compute_ui + void addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int, double); // compute_ui KOKKOS_INLINE_FUNCTION - void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double, double, double, int); // compute_ui + void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int, int, double, double, double); // compute_ui KOKKOS_INLINE_FUNCTION - void compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, + void compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, int, int, double, double, double, double, double); // compute_ui inline @@ -184,9 +189,9 @@ inline inline int compute_ncoeff(); // SNAKokkos() KOKKOS_INLINE_FUNCTION - void compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, + void compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, int, int, double, double, double, // compute_duidrj - double, double, double, double, double, int); + double, double, double, double, double); // Sets the style for the switching function // 0 = none @@ -197,7 +202,7 @@ inline double wself; int bzero_flag; // 1 if bzero subtracted from barray - Kokkos::View bzero; // array of B values for isolated atoms + Kokkos::View bzero; // array of B values for isolated atoms }; } diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index f0d45cb5c2..36765e9cd6 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -41,8 +41,6 @@ SNAKokkos::SNAKokkos(double rfac0_in, ncoeff = compute_ncoeff(); - //create_twojmax_arrays(); - nmax = 0; build_indexlist(); @@ -63,37 +61,6 @@ SNAKokkos::SNAKokkos(double rfac0_in, } } -template -KOKKOS_INLINE_FUNCTION -SNAKokkos::SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team) { - wself = sna.wself; - - rfac0 = sna.rfac0; - rmin0 = sna.rmin0; - switch_flag = sna.switch_flag; - bzero_flag = sna.bzero_flag; - - twojmax = sna.twojmax; - - ncoeff = sna.ncoeff; - nmax = sna.nmax; - idxz = sna.idxz; - idxb = sna.idxb; - idxcg_max = sna.idxcg_max; - idxu_max = sna.idxu_max; - idxz_max = sna.idxz_max; - idxb_max = sna.idxb_max; - idxcg_block = sna.idxcg_block; - idxu_block = sna.idxu_block; - idxz_block = sna.idxz_block; - idxb_block = sna.idxb_block; - cglist = sna.cglist; - rootpqarray = sna.rootpqarray; - bzero = sna.bzero; - create_team_scratch_arrays(team); - create_thread_scratch_arrays(team); -} - /* ---------------------------------------------------------------------- */ template @@ -245,18 +212,80 @@ void SNAKokkos::init() template inline -void SNAKokkos::grow_rij(int newnmax) +void SNAKokkos::grow_rij(int newnatom, int newnmax) { - if(newnmax <= nmax) return; + if(newnatom <= natom && newnmax <= nmax) return; + natom = newnatom; nmax = newnmax; + + rij = t_sna_3d("sna:rij",natom,nmax,3); + inside = t_sna_2i("sna:inside",natom,nmax); + wj = t_sna_2d("sna:wj",natom,nmax); + rcutij = t_sna_2d("sna:rcutij",natom,nmax); + dedr = t_sna_3d("sna:dedr",natom,nmax,3); + + blist = t_sna_2d("sna:blist",natom,idxb_max); + ulisttot = t_sna_2c_cpu("sna:ulisttot",natom,idxu_max); + zlist = t_sna_2c("sna:zlist",natom,idxz_max); + + ulist = t_sna_3c("sna:ulist",natom,nmax,idxu_max); + ylist = t_sna_2c("sna:ylist",natom,idxu_max); + + dulist = t_sna_4c("sna:dulist",natom,nmax,idxu_max); } + +/* ---------------------------------------------------------------------- + * compute Ui by summing over neighbors j + * ------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void SNAKokkos::pre_ui(const typename Kokkos::TeamPolicy::member_type& team, int iatom) +{ + if(team.team_rank() == 0) { + zero_uarraytot(team,iatom); + //Kokkos::single(Kokkos::PerThread(team), [&] (){ + addself_uarraytot(team,iatom,wself); + //}); + } + team.team_barrier(); +} + /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int jnum) +void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) +{ + double rsq, r, x, y, z, z0, theta0; + + // utot(j,ma,mb) = 0 for all j,ma,ma + // utot(j,ma,ma) = 1 for all j,ma + // for j in neighbors of i: + // compute r0 = (x,y,z,z0) + // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb + + x = rij(iatom,jnbor,0); + y = rij(iatom,jnbor,1); + z = rij(iatom,jnbor,2); + rsq = x * x + y * y + z * z; + r = sqrt(rsq); + + theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0); + // theta0 = (r - rmin0) * rscale0; + z0 = r / tan(theta0); + + compute_uarray(team, iatom, jnbor, x, y, z, z0, r); + //Kokkos::single(Kokkos::PerThread(team), [&] (){ + add_uarraytot(team, iatom, jnbor, r, wj(iatom,jnbor), rcutij(iatom,jnbor)); + //}); +} + +template +KOKKOS_INLINE_FUNCTION +void SNAKokkos::compute_ui_orig(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnum) { double rsq, r, x, y, z, z0, theta0; @@ -267,9 +296,9 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::compute_ui(const typename Kokkos::TeamPolicy::compute_ui(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_zi(const typename Kokkos::TeamPolicy::member_type& team) +void SNAKokkos::compute_zi(const typename Kokkos::TeamPolicy::member_type& team, int iatom) { Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max), [&] (const int& jjz) { @@ -318,8 +347,8 @@ void SNAKokkos::compute_zi(const typename Kokkos::TeamPolicy::compute_zi(const typename Kokkos::TeamPolicy::compute_zi(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_yi(const typename Kokkos::TeamPolicy::member_type& team, - const Kokkos::View &beta, const int ii) +void SNAKokkos::compute_yi(const typename Kokkos::TeamPolicy::member_type& team, int iatom, + const Kokkos::View &beta) { double betaj; + const int ii = iatom; { - double* const ptr = ylist_r.data(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_r.span()), + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist.extent(1)), [&] (const int& i) { - ptr[i] = 0.0; - }); - } - { - double* const ptr = ylist_i.data(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_i.span()), - [&] (const int& i) { - ptr[i] = 0.0; + ylist(iatom,i).re = 0.0; + ylist(iatom,i).im = 0.0; }); } + //int flopsum = 0; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max), [&] (const int& jjz) { //for(int jjz = 0; jjz < idxz_max; jjz++) { @@ -410,18 +430,15 @@ void SNAKokkos::compute_yi(const typename Kokkos::TeamPolicy::compute_yi(const typename Kokkos::TeamPolicy::compute_yi(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, double* dedr) +void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) { t_scalar3 sum; @@ -482,9 +501,9 @@ void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::compute_deidrj(const typename Kokkos::TeamPolicy::compute_deidrj(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy::member_type& team) +void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy::member_type& team, int iatom) { // for j1 = 0,...,twojmax // for j2 = 0,twojmax @@ -555,8 +575,8 @@ void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_dbidrj(const typename Kokkos::TeamPolicy::member_type& team) -{ - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // zdb = 0 - // for mb = 0,...,jmid - // for ma = 0,...,j - // zdb += - // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) - // dbdr(j1,j2,j) += 2*zdb - // zdb = 0 - // for mb1 = 0,...,j1mid - // for ma1 = 0,...,j1 - // zdb += - // Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1) - // dbdr(j1,j2,j) += 2*zdb*(j+1)/(j1+1) - // zdb = 0 - // for mb2 = 0,...,j2mid - // for ma2 = 0,...,j2 - // zdb += - // Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) - // dbdr(j1,j2,j) += 2*zdb*(j+1)/(j2+1) - - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxb_max), - [&] (const int& jjb) { - //for(int jjb = 0; jjb < idxb_max; jjb++) { - const int j1 = idxb[jjb].j1; - const int j2 = idxb[jjb].j2; - const int j = idxb[jjb].j; - -// dbdr = dblist(jjb); -// dbdr[0] = 0.0; -// dbdr[1] = 0.0; -// dbdr[2] = 0.0; - - t_scalar3 dbdr,sumzdu_r; - // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) - - int jjz = idxz_block(j1,j2,j); - int jju = idxu_block[j]; - - for(int mb = 0; 2*mb < j; mb++) - for(int ma = 0; ma <= j; ma++) { - const int jju_index = jju+mb*(j+1)+ma; - const int jjz_index = jjz+mb*(j+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } //end loop over ma mb - - // For j even, handle middle column - - if (j%2 == 0) { - int mb = j/2; - for(int ma = 0; ma <= mb; ma++) { - const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; - const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } - int ma = mb; - const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma; - const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma; - for(int k = 0; k < 3; k++) { - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5; - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5; - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5; - } - } // end if jeven - - dbdr += 2.0*sumzdu_r; - - // Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1) - - double j1fac = (j+1)/(j1+1.0); - - jjz = idxz_block(j,j2,j1); - jju = idxu_block[j1]; - - sumzdu_r.x = 0.0; sumzdu_r.y = 0.0; sumzdu_r.z = 0.0; - - for(int mb = 0; 2*mb < j1; mb++) - for(int ma = 0; ma <= j1; ma++) { - const int jju_index = jju+mb*(j1+1)+ma; - const int jjz_index = jjz+mb*(j1+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } //end loop over ma1 mb1 - - // For j1 even, handle middle column - - if (j1%2 == 0) { - const int mb = j1/2; - for(int ma = 0; ma <= mb; ma++) { - const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma; - const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } - int ma = mb; - const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma; - const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma; - for(int k = 0; k < 3; k++) { - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5; - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5; - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5; - } - } // end if j1even - - dbdr += 2.0*sumzdu_r*j1fac; - - // Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) - - double j2fac = (j+1)/(j2+1.0); - - jjz = idxz_block(j,j1,j2); - jju = idxu_block[j2]; - - sumzdu_r.x = 0.0; sumzdu_r.y = 0.0; sumzdu_r.z = 0.0; - - for(int mb = 0; 2*mb < j2; mb++) - for(int ma = 0; ma <= j2; ma++) { - const int jju_index = jju+mb*(j2+1)+ma; - const int jjz_index = jjz+mb*(j2+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } //end loop over ma2 mb2 - - // For j2 even, handle middle column - - if (j2%2 == 0) { - const int mb = j2/2; - for(int ma = 0; ma <= mb; ma++) { - const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma; - const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma; - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); - } - int ma = mb; - const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma; - const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma; - for(int k = 0; k < 3; k++) { - sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5; - sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5; - sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5; - } - } // end if j2even - - dbdr += 2.0*sumzdu_r*j2fac; - dblist(jjb,0) = dbdr.x; - dblist(jjb,1) = dbdr.y; - dblist(jjb,2) = dbdr.z; - - }); //end loop over j1 j2 j -} - /* ---------------------------------------------------------------------- calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, - double* rij, double wj, double rcut, int jj) +void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor) { double rsq, r, x, y, z, z0, theta0, cs, sn; double dz0dr; - x = rij[0]; - y = rij[1]; - z = rij[2]; + x = rij(iatom,jnbor,0); + y = rij(iatom,jnbor,1); + z = rij(iatom,jnbor,2); rsq = x * x + y * y + z * z; r = sqrt(rsq); - double rscale0 = rfac0 * MY_PI / (rcut - rmin0); + double rscale0 = rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0); theta0 = (r - rmin0) * rscale0; cs = cos(theta0); sn = sin(theta0); z0 = r * cs / sn; dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - compute_duarray(team, x, y, z, z0, r, dz0dr, wj, rcut, jj); + compute_duarray(team, iatom, jnbor, x, y, z, z0, r, dz0dr, wj(iatom,jnbor), rcutij(iatom,jnbor)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::zero_uarraytot(const typename Kokkos::TeamPolicy::member_type& team) +void SNAKokkos::zero_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int iatom) { { - double* const ptr = ulisttot_r.data(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()), + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)), [&] (const int& i) { - ptr[i] = 0.0; - }); - } - { - double* const ptr = ulisttot_i.data(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_i.span()), - [&] (const int& i) { - ptr[i] = 0.0; + ulisttot(iatom,i).re = 0.0; + ulisttot(iatom,i).im = 0.0; }); } } @@ -822,15 +663,15 @@ void SNAKokkos::zero_uarraytot(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double wself_in) +void SNAKokkos::addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int iatom, double wself_in) { Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1), [&] (const int& j) { //for (int j = 0; j <= twojmax; j++) int jju = idxu_block[j]; for (int ma = 0; ma <= j; ma++) { - ulisttot_r[jju] = wself_in; - ulisttot_i[jju] = 0.0; + ulisttot(iatom,jju).re = wself_in; + ulisttot(iatom,jju).im = 0.0; jju += j+2; } }); @@ -842,28 +683,15 @@ void SNAKokkos::addself_uarraytot(const typename Kokkos::TeamPolicy< template KOKKOS_INLINE_FUNCTION -void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, - double r, double wj, double rcut, int j) +void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, + double r, double wj, double rcut) { const double sfac = compute_sfac(r, rcut) * wj; - const double* const ptr_r = ulist_r.data(); - const double* const ptr_i = ulist_i.data(); - double* const ptrtot_r = ulisttot_r.data(); - double* const ptrtot_i = ulisttot_i.data(); - - Kokkos::View> - ulist_r_j(ulist_r_ij,j,Kokkos::ALL); - Kokkos::View> - ulist_i_j(ulist_i_ij,j,Kokkos::ALL); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()), + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)), [&] (const int& i) { - Kokkos::atomic_add(ptrtot_r+i, sfac * ptr_r[i]); - Kokkos::atomic_add(ptrtot_i+i, sfac * ptr_i[i]); - - ulist_r_j(i) = ulist_r(i); - ulist_i_j(i) = ulist_i(i); + Kokkos::atomic_add(&(ulisttot(iatom,i).re), sfac * ulist(iatom,jnbor,i).re); + Kokkos::atomic_add(&(ulisttot(iatom,i).im), sfac * ulist(iatom,jnbor,i).im); }); } @@ -873,7 +701,7 @@ void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, +void SNAKokkos::compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, double x, double y, double z, double z0, double r) { @@ -891,8 +719,8 @@ void SNAKokkos::compute_uarray(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, +void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, int iatom, int jnbor, double x, double y, double z, double z0, double r, double dz0dr, - double wj, double rcut, int jj) + double wj, double rcut) { double r0inv; double a_r, a_i, b_r, b_i; @@ -1012,17 +840,12 @@ void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy> - ulist_r(ulist_r_ij,jj,Kokkos::ALL); - Kokkos::View> - ulist_i(ulist_i_ij,jj,Kokkos::ALL); - - dulist_r(0,0) = 0.0; - dulist_r(0,1) = 0.0; - dulist_r(0,2) = 0.0; - dulist_i(0,0) = 0.0; - dulist_i(0,1) = 0.0; - dulist_i(0,2) = 0.0; + dulist(iatom,jnbor,0,0).re = 0.0; + dulist(iatom,jnbor,0,1).re = 0.0; + dulist(iatom,jnbor,0,2).re = 0.0; + dulist(iatom,jnbor,0,0).im = 0.0; + dulist(iatom,jnbor,0,1).im = 0.0; + dulist(iatom,jnbor,0,2).im = 0.0; for (int j = 1; j <= twojmax; j++) { int jju = idxu_block[j]; @@ -1031,42 +854,42 @@ void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::create_team_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) -{ - ulisttot_r_a = ulisttot_r = t_sna_1d(team.team_scratch(1),idxu_max); - ulisttot_i_a = ulisttot_i = t_sna_1d(team.team_scratch(1),idxu_max); - ylist_r = t_sna_1d(team.team_scratch(1),idxu_max); - ylist_i = t_sna_1d(team.team_scratch(1),idxu_max); - zlist_r = t_sna_1d(team.team_scratch(1),idxz_max); - zlist_i = t_sna_1d(team.team_scratch(1),idxz_max); - blist = t_sna_1d(team.team_scratch(1),idxb_max); - - rij = t_sna_2d(team.team_scratch(1),nmax,3); - rcutij = t_sna_1d(team.team_scratch(1),nmax); - wj = t_sna_1d(team.team_scratch(1),nmax); - inside = t_sna_1i(team.team_scratch(1),nmax); - ulist_r_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max); - ulist_i_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max); -} - -template -inline -T_INT SNAKokkos::size_team_scratch_arrays() { - T_INT size = 0; - - size += t_sna_1d::shmem_size(idxu_max)*2; // ulisttot - size += t_sna_1d::shmem_size(idxu_max)*2; // ylist - size += t_sna_1d::shmem_size(idxz_max)*2; // zlist - size += t_sna_1d::shmem_size(idxb_max); // blist - - size += t_sna_2d::shmem_size(nmax,3); // rij - size += t_sna_1d::shmem_size(nmax); // rcutij - size += t_sna_1d::shmem_size(nmax); // wj - size += t_sna_1i::shmem_size(nmax); // inside - size += t_sna_2d::shmem_size(nmax,idxu_max)*2; // ulist_ij - - return size; -} - -/* ---------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void SNAKokkos::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) -{ - dblist = t_sna_2d(team.thread_scratch(1),idxb_max,3); - ulist_r = t_sna_1d(team.thread_scratch(1),idxu_max); - ulist_i = t_sna_1d(team.thread_scratch(1),idxu_max); - dulist_r = t_sna_2d(team.thread_scratch(1),idxu_max,3); - dulist_i = t_sna_2d(team.thread_scratch(1),idxu_max,3); -} - -template -inline -T_INT SNAKokkos::size_thread_scratch_arrays() { - T_INT size = 0; - - size += t_sna_2d::shmem_size(idxb_max,3); // dblist - size += t_sna_1d::shmem_size(idxu_max)*2; // ulist - size += t_sna_2d::shmem_size(idxu_max,3)*2; // dulist - return size; -} - /* ---------------------------------------------------------------------- factorial n, wrapper for precomputed table ------------------------------------------------------------------------- */ @@ -1558,14 +1316,13 @@ double SNAKokkos::memory_usage() bytes += jdimpq*jdimpq * sizeof(double); // pqarray bytes += idxcg_max * sizeof(double); // cglist - bytes += idxu_max * sizeof(double) * 2; // ulist - bytes += idxu_max * sizeof(double) * 2; // ulisttot - bytes += idxu_max * 3 * sizeof(double) * 2; // dulist - - bytes += idxz_max * sizeof(double) * 2; // zlist - bytes += idxb_max * sizeof(double); // blist - bytes += idxb_max * 3 * sizeof(double); // dblist - bytes += idxu_max * sizeof(double) * 2; // ylist + bytes += natom * idxu_max * sizeof(double) * 2; // ulist + bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot + bytes += natom * idxu_max * 3 * sizeof(double) * 2; // dulist + + bytes += natom * idxz_max * sizeof(double) * 2; // zlist + bytes += natom * idxb_max * sizeof(double); // blist + bytes += natom * idxu_max * sizeof(double) * 2; // ylist bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block bytes += jdim * sizeof(int); // idxu_block @@ -1577,11 +1334,11 @@ double SNAKokkos::memory_usage() bytes += jdim * sizeof(double); // bzero - bytes += nmax * 3 * sizeof(double); // rij - bytes += nmax * sizeof(int); // inside - bytes += nmax * sizeof(double); // wj - bytes += nmax * sizeof(double); // rcutij - bytes += nmax * idxu_max * sizeof(double) * 2; // ulist_ij + bytes += natom * nmax * 3 * sizeof(double); // rij + bytes += natom * nmax * sizeof(int); // inside + bytes += natom * nmax * sizeof(double); // wj + bytes += natom * nmax * sizeof(double); // rcutij + bytes += natom * nmax * idxu_max * sizeof(double) * 2; // ulist_ij return bytes; } From c7bccc5e7a21a53ffc595d1a30979f78f4a15cc4 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 19 Jul 2019 09:40:19 -0600 Subject: [PATCH 2/3] Workaround for GCC internal compiler error --- src/KOKKOS/pair_snap_kokkos_impl.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 645521a17b..e53332050a 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -332,7 +332,7 @@ KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy::member_type& team) const { // TODO: use RangePolicy instead, or thread over ncoeff? - const int ii = team.league_rank(); + int ii = team.league_rank(); const int i = d_ilist[ii + chunk_offset]; const int itype = type[i]; const int ielem = d_map[itype]; @@ -437,7 +437,7 @@ template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const { - const int ii = team.league_rank(); + int ii = team.league_rank(); const int i = d_ilist[ii + chunk_offset]; SNAKokkos my_sna = snaKK; const double xtmp = x(i,0); @@ -505,7 +505,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeNeigh,const typen template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy::member_type& team) const { - const int ii = team.league_rank(); + int ii = team.league_rank(); SNAKokkos my_sna = snaKK; my_sna.pre_ui(team,ii); } @@ -516,7 +516,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename SNAKokkos my_sna = snaKK; // Extract the atom number - const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); if (ii >= inum) return; // Extract the neighbor number @@ -530,7 +530,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeUi,const typename template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy::member_type& team) const { - const int ii = team.league_rank(); + int ii = team.league_rank(); SNAKokkos my_sna = snaKK; my_sna.compute_yi(team,ii,d_beta); } @@ -538,7 +538,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeYi,const typename template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy::member_type& team) const { - const int ii = team.league_rank(); + int ii = team.league_rank(); SNAKokkos my_sna = snaKK; my_sna.compute_zi(team,ii); } @@ -546,7 +546,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeZi,const typename template KOKKOS_INLINE_FUNCTION void PairSNAPKokkos::operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy::member_type& team) const { - const int ii = team.league_rank(); + int ii = team.league_rank(); SNAKokkos my_sna = snaKK; my_sna.compute_bi(team,ii); } @@ -557,7 +557,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeDuidrj,const type SNAKokkos my_sna = snaKK; // Extract the atom number - const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); if (ii >= inum) return; // Extract the neighbor number @@ -574,7 +574,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeDeidrj,const type SNAKokkos my_sna = snaKK; // Extract the atom number - const int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); + int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size())); if (ii >= inum) return; // Extract the neighbor number @@ -595,7 +595,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPComputeForce::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); - const int ii = team.league_rank(); + int ii = team.league_rank(); const int i = d_ilist[ii + chunk_offset]; SNAKokkos my_sna = snaKK; const int ninside = d_ninside(ii); From 502fbfb78a76b2e77b3ff32d7807deb29631ad64 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 19 Jul 2019 15:20:22 -0600 Subject: [PATCH 3/3] Fix memory issue --- src/KOKKOS/pair_snap_kokkos_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index e53332050a..95afcc5ec7 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -208,7 +208,7 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) d_ninside = Kokkos::View("PairSNAPKokkos:ninside",inum); } - int chunk_size = 2000; + int chunk_size = MIN(2000,inum); chunk_offset = 0; snaKK.grow_rij(chunk_size,max_neighs);