forked from lijiext/lammps
Merge pull request #1571 from stanmoore1/kk_snap_opt
Add optimized version of Kokkos SNAP potential
This commit is contained in:
commit
d52540ea31
|
@ -31,10 +31,17 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
struct TagPairSNAPCompute{};
|
struct TagPairSNAPComputeForce{};
|
||||||
|
|
||||||
struct TagPairSNAPBeta{};
|
struct TagPairSNAPBeta{};
|
||||||
struct TagPairSNAPBispectrum{};
|
struct TagPairSNAPComputeNeigh{};
|
||||||
|
struct TagPairSNAPPreUi{};
|
||||||
|
struct TagPairSNAPComputeUi{};
|
||||||
|
struct TagPairSNAPComputeZi{};
|
||||||
|
struct TagPairSNAPComputeBi{};
|
||||||
|
struct TagPairSNAPComputeYi{};
|
||||||
|
struct TagPairSNAPComputeDuidrj{};
|
||||||
|
struct TagPairSNAPComputeDeidrj{};
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class PairSNAPKokkos : public PairSNAP {
|
class PairSNAPKokkos : public PairSNAP {
|
||||||
|
@ -56,18 +63,39 @@ public:
|
||||||
|
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||||
|
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj>::member_type& team) const;
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj>::member_type& team) const;
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const;
|
void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const;
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const;
|
|
||||||
|
|
||||||
template<int NEIGHFLAG>
|
template<int NEIGHFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||||
|
@ -90,15 +118,10 @@ protected:
|
||||||
t_dbvec dbvec;
|
t_dbvec dbvec;
|
||||||
SNAKokkos<DeviceType> snaKK;
|
SNAKokkos<DeviceType> snaKK;
|
||||||
|
|
||||||
// How much parallelism to use within an interaction
|
int inum,max_neighs,chunk_offset;
|
||||||
int vector_length,team_size;
|
|
||||||
int team_scratch_size;
|
|
||||||
int thread_scratch_size;
|
|
||||||
|
|
||||||
int eflag,vflag;
|
int eflag,vflag;
|
||||||
|
|
||||||
void compute_beta();
|
|
||||||
void compute_bispectrum();
|
|
||||||
void allocate();
|
void allocate();
|
||||||
//void read_files(char *, char *);
|
//void read_files(char *, char *);
|
||||||
/*template<class DeviceType>
|
/*template<class DeviceType>
|
||||||
|
@ -131,6 +154,7 @@ inline double dist2(double* x,double* y);
|
||||||
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
||||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||||
|
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||||
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
|
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
|
||||||
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,6 @@ PairSNAPKokkos<DeviceType>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
||||||
datamask_read = EMPTY_MASK;
|
datamask_read = EMPTY_MASK;
|
||||||
datamask_modify = EMPTY_MASK;
|
datamask_modify = EMPTY_MASK;
|
||||||
|
|
||||||
vector_length = 8;
|
|
||||||
k_cutsq = tdual_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1);
|
k_cutsq = tdual_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1);
|
||||||
auto d_cutsq = k_cutsq.template view<DeviceType>();
|
auto d_cutsq = k_cutsq.template view<DeviceType>();
|
||||||
rnd_cutsq = d_cutsq;
|
rnd_cutsq = d_cutsq;
|
||||||
|
@ -164,7 +163,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||||
d_numneigh = k_list->d_numneigh;
|
d_numneigh = k_list->d_numneigh;
|
||||||
d_neighbors = k_list->d_neighbors;
|
d_neighbors = k_list->d_neighbors;
|
||||||
d_ilist = k_list->d_ilist;
|
d_ilist = k_list->d_ilist;
|
||||||
int inum = list->inum;
|
inum = list->inum;
|
||||||
|
|
||||||
need_dup = lmp->kokkos->need_dup<DeviceType>();
|
need_dup = lmp->kokkos->need_dup<DeviceType>();
|
||||||
if (need_dup) {
|
if (need_dup) {
|
||||||
|
@ -181,70 +180,112 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||||
const int num_neighs = neighs_i.get_num_neighs();
|
const int num_neighs = neighs_i.get_num_neighs();
|
||||||
if (max_neighs<num_neighs) max_neighs = num_neighs;
|
if (max_neighs<num_neighs) max_neighs = num_neighs;
|
||||||
}*/
|
}*/
|
||||||
int max_neighs = 0;
|
max_neighs = 0;
|
||||||
Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(max_neighs));
|
Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(max_neighs));
|
||||||
|
|
||||||
snaKK.nmax = max_neighs;
|
int vector_length = 1;
|
||||||
|
int ui_vector_length = 1;
|
||||||
team_scratch_size = snaKK.size_team_scratch_arrays();
|
int team_size = 1;
|
||||||
thread_scratch_size = snaKK.size_thread_scratch_arrays();
|
int yi_team_size = 1;
|
||||||
|
|
||||||
//printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024);
|
|
||||||
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
||||||
vector_length = 8;
|
|
||||||
#ifdef KOKKOS_ENABLE_CUDA
|
#ifdef KOKKOS_ENABLE_CUDA
|
||||||
team_size = 32;//max_neighs;
|
team_size = 32;//max_neighs;
|
||||||
if (team_size*vector_length > team_size_max)
|
if (team_size*vector_length > team_size_max)
|
||||||
team_size = team_size_max/vector_length;
|
team_size = team_size_max/vector_length;
|
||||||
#else
|
|
||||||
team_size = 1;
|
yi_team_size = 256;
|
||||||
|
if (yi_team_size*vector_length > team_size_max)
|
||||||
|
yi_team_size = team_size_max/vector_length;
|
||||||
|
|
||||||
|
ui_vector_length = 8;
|
||||||
|
if (team_size*ui_vector_length > team_size_max)
|
||||||
|
team_size = team_size_max/ui_vector_length;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (beta_max < list->inum) {
|
if (beta_max < inum) {
|
||||||
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",
|
beta_max = inum;
|
||||||
list->inum,ncoeff);
|
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",inum,ncoeff);
|
||||||
d_bispectrum = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:bispectrum",
|
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
||||||
list->inum,ncoeff);
|
|
||||||
beta_max = list->inum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute dE_i/dB_i = beta_i for all i in list
|
int chunk_size = MIN(2000,inum);
|
||||||
|
chunk_offset = 0;
|
||||||
|
|
||||||
if (quadraticflag || eflag)
|
snaKK.grow_rij(chunk_size,max_neighs);
|
||||||
compute_bispectrum();
|
|
||||||
compute_beta();
|
|
||||||
|
|
||||||
EV_FLOAT ev;
|
EV_FLOAT ev;
|
||||||
|
|
||||||
if (eflag) {
|
while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
|
||||||
if (neighflag == HALF) {
|
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,1> > policy(inum,team_size,vector_length);
|
EV_FLOAT ev_tmp;
|
||||||
Kokkos::parallel_reduce(policy
|
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
if (chunk_size > inum - chunk_offset)
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
chunk_size = inum - chunk_offset;
|
||||||
,*this,ev);
|
|
||||||
} else if (neighflag == HALFTHREAD) {
|
//ComputeNeigh
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,1> > policy(inum,team_size,vector_length);
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
||||||
Kokkos::parallel_reduce(policy
|
Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this);
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
//PreUi
|
||||||
,*this,ev);
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi> policy_preui(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("PreUi",policy_preui,*this);
|
||||||
|
|
||||||
|
//ComputeUi
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi> policy_ui(((inum+team_size-1)/team_size)*max_neighs,team_size,ui_vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeUi",policy_ui,*this);
|
||||||
|
|
||||||
|
//Compute bispectrum
|
||||||
|
if (quadraticflag || eflag) {
|
||||||
|
//ComputeZi
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi> policy_zi(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeZi",policy_zi,*this);
|
||||||
|
|
||||||
|
//ComputeBi
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi> policy_bi(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeBi",policy_bi,*this);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (neighflag == HALF) {
|
//Compute beta = dE_i/dB_i for all i in list
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,0> > policy(inum,team_size,vector_length);
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy_beta(chunk_size,team_size,vector_length);
|
||||||
Kokkos::parallel_for(policy
|
Kokkos::parallel_for("ComputeBeta",policy_beta,*this);
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
//ComputeYi
|
||||||
,*this);
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi> policy_yi(chunk_size,yi_team_size,vector_length);
|
||||||
} else if (neighflag == HALFTHREAD) {
|
Kokkos::parallel_for("ComputeYi",policy_yi,*this);
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,0> > policy(inum,team_size,vector_length);
|
|
||||||
Kokkos::parallel_for(policy
|
//ComputeDuidrj
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj> policy_duidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length);
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
Kokkos::parallel_for("ComputeDuidrj",policy_duidrj,*this);
|
||||||
,*this);
|
|
||||||
|
//ComputeDeidrj
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj> policy_deidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for("ComputeDeidrj",policy_deidrj,*this);
|
||||||
|
|
||||||
|
//ComputeForce
|
||||||
|
if (eflag) {
|
||||||
|
if (neighflag == HALF) {
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALF,1> > policy_force(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_reduce(policy_force
|
||||||
|
,*this,ev_tmp);
|
||||||
|
} else if (neighflag == HALFTHREAD) {
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALFTHREAD,1> > policy_force(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_reduce(policy_force
|
||||||
|
,*this,ev_tmp);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (neighflag == HALF) {
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALF,0> > policy_force(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for(policy_force
|
||||||
|
,*this);
|
||||||
|
} else if (neighflag == HALFTHREAD) {
|
||||||
|
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALFTHREAD,0> > policy_force(chunk_size,team_size,vector_length);
|
||||||
|
Kokkos::parallel_for(policy_force
|
||||||
|
,*this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
ev += ev_tmp;
|
||||||
|
chunk_offset += chunk_size;
|
||||||
|
} // end while
|
||||||
|
|
||||||
if (need_dup)
|
if (need_dup)
|
||||||
Kokkos::Experimental::contribute(f, dup_f);
|
Kokkos::Experimental::contribute(f, dup_f);
|
||||||
|
@ -284,32 +325,19 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
compute beta
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
void PairSNAPKokkos<DeviceType>::compute_beta()
|
|
||||||
{
|
|
||||||
// TODO: use RangePolicy instead, or thread over ncoeff?
|
|
||||||
int inum = list->inum;
|
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy(inum,team_size,vector_length);
|
|
||||||
Kokkos::parallel_for(policy
|
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
|
||||||
,*this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const {
|
||||||
|
|
||||||
const int ii = team.league_rank();
|
// TODO: use RangePolicy instead, or thread over ncoeff?
|
||||||
const int i = d_ilist[ii];
|
int ii = team.league_rank();
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
const int itype = type[i];
|
const int itype = type[i];
|
||||||
const int ielem = d_map[itype];
|
const int ielem = d_map[itype];
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
|
||||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||||
|
|
||||||
|
@ -319,11 +347,11 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokk
|
||||||
if (quadraticflag) {
|
if (quadraticflag) {
|
||||||
int k = ncoeff+1;
|
int k = ncoeff+1;
|
||||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||||
double bveci = d_bispectrum(ii,icoeff);
|
double bveci = my_sna.blist(ii,icoeff);
|
||||||
d_beta(ii,icoeff) += d_coeffi[k]*bveci;
|
d_beta(ii,icoeff) += d_coeffi[k]*bveci;
|
||||||
k++;
|
k++;
|
||||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||||
double bvecj = d_bispectrum(ii,jcoeff);
|
double bvecj = my_sna.blist(ii,jcoeff);
|
||||||
d_beta(ii,icoeff) += d_coeffi[k]*bvecj;
|
d_beta(ii,icoeff) += d_coeffi[k]*bvecj;
|
||||||
d_beta(ii,jcoeff) += d_coeffi[k]*bveci;
|
d_beta(ii,jcoeff) += d_coeffi[k]*bveci;
|
||||||
k++;
|
k++;
|
||||||
|
@ -332,105 +360,6 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
compute bispectrum
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
void PairSNAPKokkos<DeviceType>::compute_bispectrum()
|
|
||||||
{
|
|
||||||
int inum = list->inum;
|
|
||||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum> policy(inum,team_size,vector_length);
|
|
||||||
Kokkos::parallel_for(policy
|
|
||||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
|
||||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
|
||||||
,*this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
template<class DeviceType>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const {
|
|
||||||
|
|
||||||
const int ii = team.league_rank();
|
|
||||||
const int i = d_ilist[ii];
|
|
||||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
|
||||||
const double xtmp = x(i,0);
|
|
||||||
const double ytmp = x(i,1);
|
|
||||||
const double ztmp = x(i,2);
|
|
||||||
const int itype = type[i];
|
|
||||||
const int ielem = d_map[itype];
|
|
||||||
const double radi = d_radelem[ielem];
|
|
||||||
|
|
||||||
const int num_neighs = d_numneigh[i];
|
|
||||||
|
|
||||||
// rij[][3] = displacements between atom I and those neighbors
|
|
||||||
// inside = indices of neighbors of I within cutoff
|
|
||||||
// wj = weights for neighbors of I within cutoff
|
|
||||||
// rcutij = cutoffs for neighbors of I within cutoff
|
|
||||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
|
||||||
|
|
||||||
int ninside = 0;
|
|
||||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
|
|
||||||
[&] (const int jj, int& count) {
|
|
||||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
|
||||||
T_INT j = d_neighbors(i,jj);
|
|
||||||
const F_FLOAT dx = x(j,0) - xtmp;
|
|
||||||
const F_FLOAT dy = x(j,1) - ytmp;
|
|
||||||
const F_FLOAT dz = x(j,2) - ztmp;
|
|
||||||
|
|
||||||
const int jtype = type(j);
|
|
||||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
|
||||||
const int elem_j = d_map[jtype];
|
|
||||||
|
|
||||||
if ( rsq < rnd_cutsq(itype,jtype) )
|
|
||||||
count++;
|
|
||||||
});
|
|
||||||
},ninside);
|
|
||||||
|
|
||||||
if (team.team_rank() == 0)
|
|
||||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
|
||||||
[&] (const int jj, int& offset, bool final) {
|
|
||||||
//for (int jj = 0; jj < num_neighs; jj++) {
|
|
||||||
T_INT j = d_neighbors(i,jj);
|
|
||||||
const F_FLOAT dx = x(j,0) - xtmp;
|
|
||||||
const F_FLOAT dy = x(j,1) - ytmp;
|
|
||||||
const F_FLOAT dz = x(j,2) - ztmp;
|
|
||||||
|
|
||||||
const int jtype = type(j);
|
|
||||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
|
||||||
const int elem_j = d_map[jtype];
|
|
||||||
|
|
||||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
|
||||||
if (final) {
|
|
||||||
my_sna.rij(offset,0) = dx;
|
|
||||||
my_sna.rij(offset,1) = dy;
|
|
||||||
my_sna.rij(offset,2) = dz;
|
|
||||||
my_sna.inside[offset] = j;
|
|
||||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
|
||||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
|
||||||
}
|
|
||||||
offset++;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
team.team_barrier();
|
|
||||||
|
|
||||||
// compute Ui, Zi, and Bi for atom I
|
|
||||||
|
|
||||||
my_sna.compute_ui(team,ninside);
|
|
||||||
team.team_barrier();
|
|
||||||
|
|
||||||
my_sna.compute_zi(team);
|
|
||||||
team.team_barrier();
|
|
||||||
|
|
||||||
my_sna.compute_bi(team);
|
|
||||||
team.team_barrier();
|
|
||||||
|
|
||||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
|
||||||
d_bispectrum(ii,icoeff) = my_sna.blist[icoeff];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
allocate all arrays
|
allocate all arrays
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
@ -496,30 +425,21 @@ void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
|
||||||
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
||||||
Kokkos::deep_copy(d_map,h_map);
|
Kokkos::deep_copy(d_map,h_map);
|
||||||
|
|
||||||
// allocate memory for per OpenMP thread data which
|
|
||||||
// is wrapped into the sna class
|
|
||||||
|
|
||||||
snaKK = SNAKokkos<DeviceType>(rfac0,twojmax,
|
snaKK = SNAKokkos<DeviceType>(rfac0,twojmax,
|
||||||
rmin0,switchflag,bzeroflag);
|
rmin0,switchflag,bzeroflag);
|
||||||
snaKK.grow_rij(0);
|
snaKK.grow_rij(0,0);
|
||||||
snaKK.init();
|
snaKK.init();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||||
|
|
||||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
int ii = team.league_rank();
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
|
|
||||||
|
|
||||||
const int ii = team.league_rank();
|
|
||||||
const int i = d_ilist[ii];
|
|
||||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
|
||||||
const double xtmp = x(i,0);
|
const double xtmp = x(i,0);
|
||||||
const double ytmp = x(i,1);
|
const double ytmp = x(i,1);
|
||||||
const double ztmp = x(i,2);
|
const double ztmp = x(i,2);
|
||||||
|
@ -553,6 +473,8 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
});
|
});
|
||||||
},ninside);
|
},ninside);
|
||||||
|
|
||||||
|
d_ninside(ii) = ninside;
|
||||||
|
|
||||||
if (team.team_rank() == 0)
|
if (team.team_rank() == 0)
|
||||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||||
[&] (const int jj, int& offset, bool final) {
|
[&] (const int jj, int& offset, bool final) {
|
||||||
|
@ -568,43 +490,124 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
|
|
||||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
||||||
if (final) {
|
if (final) {
|
||||||
my_sna.rij(offset,0) = dx;
|
my_sna.rij(ii,offset,0) = dx;
|
||||||
my_sna.rij(offset,1) = dy;
|
my_sna.rij(ii,offset,1) = dy;
|
||||||
my_sna.rij(offset,2) = dz;
|
my_sna.rij(ii,offset,2) = dz;
|
||||||
my_sna.inside[offset] = j;
|
my_sna.inside(ii,offset) = j;
|
||||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
my_sna.wj(ii,offset) = d_wjelem[elem_j];
|
||||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
my_sna.rcutij(ii,offset) = (radi + d_radelem[elem_j])*rcutfac;
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
team.team_barrier();
|
}
|
||||||
|
|
||||||
// compute Ui, Yi for atom I
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const {
|
||||||
|
int ii = team.league_rank();
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
my_sna.pre_ui(team,ii);
|
||||||
|
}
|
||||||
|
|
||||||
my_sna.compute_ui(team,ninside);
|
template<class DeviceType>
|
||||||
team.team_barrier();
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const {
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
|
||||||
// for neighbors of I within cutoff:
|
// Extract the atom number
|
||||||
// compute Fij = dEi/dRj = -dEi/dRi
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||||
// add to Fi, subtract from Fj
|
if (ii >= inum) return;
|
||||||
|
|
||||||
my_sna.compute_yi(team,d_beta,ii);
|
// Extract the neighbor number
|
||||||
team.team_barrier();
|
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||||
|
const int ninside = d_ninside(ii);
|
||||||
|
if (jj >= ninside) return;
|
||||||
|
|
||||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
my_sna.compute_ui(team,ii,jj);
|
||||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const {
|
||||||
|
int ii = team.league_rank();
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
my_sna.compute_yi(team,ii,d_beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const {
|
||||||
|
int ii = team.league_rank();
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
my_sna.compute_zi(team,ii);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const {
|
||||||
|
int ii = team.league_rank();
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
my_sna.compute_bi(team,ii);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj>::member_type& team) const {
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
|
||||||
|
// Extract the atom number
|
||||||
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||||
|
if (ii >= inum) return;
|
||||||
|
|
||||||
|
// Extract the neighbor number
|
||||||
|
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||||
|
const int ninside = d_ninside(ii);
|
||||||
|
if (jj >= ninside) return;
|
||||||
|
|
||||||
|
my_sna.compute_duidrj(team,ii,jj);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj>::member_type& team) const {
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
|
||||||
|
// Extract the atom number
|
||||||
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||||
|
if (ii >= inum) return;
|
||||||
|
|
||||||
|
// Extract the neighbor number
|
||||||
|
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||||
|
const int ninside = d_ninside(ii);
|
||||||
|
if (jj >= ninside) return;
|
||||||
|
|
||||||
|
my_sna.compute_deidrj(team,ii,jj);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class DeviceType>
|
||||||
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||||
|
|
||||||
|
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||||
|
|
||||||
|
auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
|
||||||
|
auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
|
||||||
|
|
||||||
|
int ii = team.league_rank();
|
||||||
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
|
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||||
|
const int ninside = d_ninside(ii);
|
||||||
|
|
||||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||||
[&] (const int jj) {
|
[&] (const int jj) {
|
||||||
//for (int jj = 0; jj < ninside; jj++) {
|
int j = my_sna.inside(ii,jj);
|
||||||
int j = my_sna.inside[jj];
|
|
||||||
|
|
||||||
my_sna.compute_duidrj(team,&my_sna.rij(jj,0),
|
|
||||||
my_sna.wj[jj],my_sna.rcutij[jj],jj);
|
|
||||||
|
|
||||||
F_FLOAT fij[3];
|
F_FLOAT fij[3];
|
||||||
my_sna.compute_deidrj(team,fij);
|
fij[0] = my_sna.dedr(ii,jj,0);
|
||||||
|
fij[1] = my_sna.dedr(ii,jj,1);
|
||||||
|
fij[2] = my_sna.dedr(ii,jj,2);
|
||||||
|
|
||||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||||
a_f(i,0) += fij[0];
|
a_f(i,0) += fij[0];
|
||||||
|
@ -620,8 +623,8 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
if (vflag_either) {
|
if (vflag_either) {
|
||||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||||
fij[0],fij[1],fij[2],
|
fij[0],fij[1],fij[2],
|
||||||
-my_sna.rij(jj,0),-my_sna.rij(jj,1),
|
-my_sna.rij(ii,jj,0),-my_sna.rij(ii,jj,1),
|
||||||
-my_sna.rij(jj,2));
|
-my_sna.rij(ii,jj,2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -633,6 +636,11 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
if (EVFLAG) {
|
if (EVFLAG) {
|
||||||
if (eflag_either) {
|
if (eflag_either) {
|
||||||
|
|
||||||
|
const int itype = type(i);
|
||||||
|
const int ielem = d_map[itype];
|
||||||
|
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||||
|
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||||
|
|
||||||
Kokkos::single(Kokkos::PerTeam(team), [&] () {
|
Kokkos::single(Kokkos::PerTeam(team), [&] () {
|
||||||
|
|
||||||
// evdwl = energy of atom I, sum over coeffs_k * Bi_k
|
// evdwl = energy of atom I, sum over coeffs_k * Bi_k
|
||||||
|
@ -640,21 +648,21 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
double evdwl = d_coeffi[0];
|
double evdwl = d_coeffi[0];
|
||||||
|
|
||||||
// E = beta.B + 0.5*B^t.alpha.B
|
// E = beta.B + 0.5*B^t.alpha.B
|
||||||
|
|
||||||
// linear contributions
|
// linear contributions
|
||||||
|
|
||||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||||
evdwl += d_coeffi[icoeff+1]*d_bispectrum(ii,icoeff);
|
evdwl += d_coeffi[icoeff+1]*my_sna.blist(ii,icoeff);
|
||||||
|
|
||||||
// quadratic contributions
|
// quadratic contributions
|
||||||
|
|
||||||
if (quadraticflag) {
|
if (quadraticflag) {
|
||||||
int k = ncoeff+1;
|
int k = ncoeff+1;
|
||||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||||
double bveci = d_bispectrum(ii,icoeff);
|
double bveci = my_sna.blist(ii,icoeff);
|
||||||
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
||||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||||
double bvecj = d_bispectrum(ii,jcoeff);
|
double bvecj = my_sna.blist(ii,jcoeff);
|
||||||
evdwl += d_coeffi[k++]*bveci*bvecj;
|
evdwl += d_coeffi[k++]*bveci*bvecj;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -671,9 +679,9 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||||
EV_FLOAT ev;
|
EV_FLOAT ev;
|
||||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPCompute<NEIGHFLAG,EVFLAG>(), team, ev);
|
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
@ -738,4 +746,5 @@ double PairSNAPKokkos<DeviceType>::memory_usage()
|
||||||
bytes += snaKK.memory_usage();
|
bytes += snaKK.memory_usage();
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
typedef double SNAreal;
|
||||||
|
typedef struct { SNAreal re, im; } SNAcomplex;
|
||||||
|
|
||||||
struct SNAKK_ZINDICES {
|
struct SNAKK_ZINDICES {
|
||||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||||
};
|
};
|
||||||
|
@ -39,12 +42,22 @@ class SNAKokkos {
|
||||||
public:
|
public:
|
||||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||||
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||||
typedef Kokkos::View<double*, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
typedef Kokkos::View<double*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||||
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d;
|
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d;
|
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
|
||||||
typedef Kokkos::View<double***[3], Kokkos::LayoutRight, DeviceType> t_sna_4d;
|
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
|
||||||
typedef Kokkos::View<double**[3], Kokkos::LayoutRight, DeviceType> t_sna_3d3;
|
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
|
||||||
typedef Kokkos::View<double*****, Kokkos::LayoutRight, DeviceType> t_sna_5d;
|
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
|
||||||
|
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
|
||||||
|
|
||||||
|
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
|
||||||
|
typedef Kokkos::View<SNAcomplex*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||||
|
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
|
||||||
|
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_cpu;
|
||||||
|
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
|
||||||
|
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
|
||||||
|
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||||
|
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||||
|
|
||||||
inline
|
inline
|
||||||
SNAKokkos() {};
|
SNAKokkos() {};
|
||||||
|
@ -63,36 +76,31 @@ inline
|
||||||
inline
|
inline
|
||||||
void init(); //
|
void init(); //
|
||||||
|
|
||||||
inline
|
|
||||||
T_INT size_team_scratch_arrays();
|
|
||||||
|
|
||||||
inline
|
|
||||||
T_INT size_thread_scratch_arrays();
|
|
||||||
|
|
||||||
double memory_usage();
|
double memory_usage();
|
||||||
|
|
||||||
int ncoeff;
|
int ncoeff;
|
||||||
|
|
||||||
// functions for bispectrum coefficients
|
// functions for bispectrum coefficients
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
void compute_ui_orig(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta, const int ii); // ForceSNAP
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int,
|
||||||
|
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||||
|
|
||||||
// functions for derivatives
|
// functions for derivatives
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double*, double, double, int); //ForceSNAP
|
void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double *); // ForceSNAP
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
@ -109,29 +117,26 @@ inline
|
||||||
|
|
||||||
|
|
||||||
// Per InFlight Particle
|
// Per InFlight Particle
|
||||||
t_sna_2d rij;
|
t_sna_3d rij;
|
||||||
t_sna_1i inside;
|
t_sna_2i inside;
|
||||||
t_sna_1d wj;
|
t_sna_2d wj;
|
||||||
t_sna_1d rcutij;
|
t_sna_2d rcutij;
|
||||||
int nmax;
|
t_sna_3d dedr;
|
||||||
|
int natom, nmax;
|
||||||
|
|
||||||
void grow_rij(int);
|
void grow_rij(int, int);
|
||||||
|
|
||||||
int twojmax, diagonalstyle;
|
int twojmax, diagonalstyle;
|
||||||
// Per InFlight Particle
|
|
||||||
t_sna_1d blist;
|
t_sna_2d blist;
|
||||||
t_sna_1d ulisttot_r, ulisttot_i;
|
t_sna_2c_cpu ulisttot;
|
||||||
t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a;
|
t_sna_2c zlist;
|
||||||
t_sna_1d zlist_r, zlist_i;
|
|
||||||
t_sna_2d ulist_r_ij, ulist_i_ij;
|
|
||||||
|
|
||||||
// Per InFlight Interaction
|
t_sna_3c ulist;
|
||||||
t_sna_1d ulist_r, ulist_i;
|
t_sna_2c ylist;
|
||||||
t_sna_1d_atomic ylist_r, ylist_i;
|
|
||||||
|
|
||||||
// derivatives of data
|
// derivatives of data
|
||||||
t_sna_2d dulist_r, dulist_i;
|
t_sna_4c dulist;
|
||||||
t_sna_2d dblist;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double rmin0, rfac0;
|
double rmin0, rfac0;
|
||||||
|
@ -168,14 +173,14 @@ inline
|
||||||
inline
|
inline
|
||||||
void init_rootpqarray(); // init()
|
void init_rootpqarray(); // init()
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // compute_ui
|
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // compute_ui
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double); // compute_ui
|
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, double); // compute_ui
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double, double, double, int); // compute_ui
|
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, double, double, double); // compute_ui
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||||
double, double, double,
|
double, double, double,
|
||||||
double, double); // compute_ui
|
double, double); // compute_ui
|
||||||
inline
|
inline
|
||||||
|
@ -184,9 +189,9 @@ inline
|
||||||
inline
|
inline
|
||||||
int compute_ncoeff(); // SNAKokkos()
|
int compute_ncoeff(); // SNAKokkos()
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||||
double, double, double, // compute_duidrj
|
double, double, double, // compute_duidrj
|
||||||
double, double, double, double, double, int);
|
double, double, double, double, double);
|
||||||
|
|
||||||
// Sets the style for the switching function
|
// Sets the style for the switching function
|
||||||
// 0 = none
|
// 0 = none
|
||||||
|
@ -197,7 +202,7 @@ inline
|
||||||
double wself;
|
double wself;
|
||||||
|
|
||||||
int bzero_flag; // 1 if bzero subtracted from barray
|
int bzero_flag; // 1 if bzero subtracted from barray
|
||||||
Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bzero; // array of B values for isolated atoms
|
Kokkos::View<double*, DeviceType> bzero; // array of B values for isolated atoms
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue