forked from lijiext/lammps
Merge pull request #1571 from stanmoore1/kk_snap_opt
Add optimized version of Kokkos SNAP potential
This commit is contained in:
commit
d52540ea31
|
@ -31,10 +31,17 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
|
|||
namespace LAMMPS_NS {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAPCompute{};
|
||||
struct TagPairSNAPComputeForce{};
|
||||
|
||||
struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPBispectrum{};
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUi{};
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeDuidrj{};
|
||||
struct TagPairSNAPComputeDeidrj{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairSNAPKokkos : public PairSNAP {
|
||||
|
@ -56,18 +63,39 @@ public:
|
|||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||
|
@ -90,15 +118,10 @@ protected:
|
|||
t_dbvec dbvec;
|
||||
SNAKokkos<DeviceType> snaKK;
|
||||
|
||||
// How much parallelism to use within an interaction
|
||||
int vector_length,team_size;
|
||||
int team_scratch_size;
|
||||
int thread_scratch_size;
|
||||
int inum,max_neighs,chunk_offset;
|
||||
|
||||
int eflag,vflag;
|
||||
|
||||
void compute_beta();
|
||||
void compute_bispectrum();
|
||||
void allocate();
|
||||
//void read_files(char *, char *);
|
||||
/*template<class DeviceType>
|
||||
|
@ -131,6 +154,7 @@ inline double dist2(double* x,double* y);
|
|||
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
|
||||
|
|
|
@ -57,7 +57,6 @@ PairSNAPKokkos<DeviceType>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
|||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
vector_length = 8;
|
||||
k_cutsq = tdual_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1);
|
||||
auto d_cutsq = k_cutsq.template view<DeviceType>();
|
||||
rnd_cutsq = d_cutsq;
|
||||
|
@ -164,7 +163,7 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
int inum = list->inum;
|
||||
inum = list->inum;
|
||||
|
||||
need_dup = lmp->kokkos->need_dup<DeviceType>();
|
||||
if (need_dup) {
|
||||
|
@ -181,70 +180,112 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
const int num_neighs = neighs_i.get_num_neighs();
|
||||
if (max_neighs<num_neighs) max_neighs = num_neighs;
|
||||
}*/
|
||||
int max_neighs = 0;
|
||||
max_neighs = 0;
|
||||
Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(max_neighs));
|
||||
|
||||
snaKK.nmax = max_neighs;
|
||||
|
||||
team_scratch_size = snaKK.size_team_scratch_arrays();
|
||||
thread_scratch_size = snaKK.size_thread_scratch_arrays();
|
||||
|
||||
//printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024);
|
||||
int vector_length = 1;
|
||||
int ui_vector_length = 1;
|
||||
int team_size = 1;
|
||||
int yi_team_size = 1;
|
||||
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
||||
vector_length = 8;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
team_size = 32;//max_neighs;
|
||||
if (team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
#else
|
||||
team_size = 1;
|
||||
|
||||
yi_team_size = 256;
|
||||
if (yi_team_size*vector_length > team_size_max)
|
||||
yi_team_size = team_size_max/vector_length;
|
||||
|
||||
ui_vector_length = 8;
|
||||
if (team_size*ui_vector_length > team_size_max)
|
||||
team_size = team_size_max/ui_vector_length;
|
||||
#endif
|
||||
|
||||
if (beta_max < list->inum) {
|
||||
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",
|
||||
list->inum,ncoeff);
|
||||
d_bispectrum = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:bispectrum",
|
||||
list->inum,ncoeff);
|
||||
beta_max = list->inum;
|
||||
if (beta_max < inum) {
|
||||
beta_max = inum;
|
||||
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",inum,ncoeff);
|
||||
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
||||
}
|
||||
|
||||
// compute dE_i/dB_i = beta_i for all i in list
|
||||
int chunk_size = MIN(2000,inum);
|
||||
chunk_offset = 0;
|
||||
|
||||
if (quadraticflag || eflag)
|
||||
compute_bispectrum();
|
||||
compute_beta();
|
||||
snaKK.grow_rij(chunk_size,max_neighs);
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
|
||||
|
||||
EV_FLOAT ev_tmp;
|
||||
|
||||
if (chunk_size > inum - chunk_offset)
|
||||
chunk_size = inum - chunk_offset;
|
||||
|
||||
//ComputeNeigh
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this);
|
||||
|
||||
//PreUi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi> policy_preui(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("PreUi",policy_preui,*this);
|
||||
|
||||
//ComputeUi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi> policy_ui(((inum+team_size-1)/team_size)*max_neighs,team_size,ui_vector_length);
|
||||
Kokkos::parallel_for("ComputeUi",policy_ui,*this);
|
||||
|
||||
//Compute bispectrum
|
||||
if (quadraticflag || eflag) {
|
||||
//ComputeZi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi> policy_zi(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeZi",policy_zi,*this);
|
||||
|
||||
//ComputeBi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi> policy_bi(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeBi",policy_bi,*this);
|
||||
}
|
||||
|
||||
//Compute beta = dE_i/dB_i for all i in list
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy_beta(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeBeta",policy_beta,*this);
|
||||
|
||||
//ComputeYi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi> policy_yi(chunk_size,yi_team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeYi",policy_yi,*this);
|
||||
|
||||
//ComputeDuidrj
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj> policy_duidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeDuidrj",policy_duidrj,*this);
|
||||
|
||||
//ComputeDeidrj
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj> policy_deidrj(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeDeidrj",policy_deidrj,*this);
|
||||
|
||||
//ComputeForce
|
||||
if (eflag) {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,1> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this,ev);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALF,1> > policy_force(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy_force
|
||||
,*this,ev_tmp);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,1> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this,ev);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALFTHREAD,1> > policy_force(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy_force
|
||||
,*this,ev_tmp);
|
||||
}
|
||||
} else {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,0> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALF,0> > policy_force(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy_force
|
||||
,*this);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,0> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<HALFTHREAD,0> > policy_force(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy_force
|
||||
,*this);
|
||||
}
|
||||
}
|
||||
ev += ev_tmp;
|
||||
chunk_offset += chunk_size;
|
||||
} // end while
|
||||
|
||||
if (need_dup)
|
||||
Kokkos::Experimental::contribute(f, dup_f);
|
||||
|
@ -284,32 +325,19 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute beta
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::compute_beta()
|
||||
{
|
||||
// TODO: use RangePolicy instead, or thread over ncoeff?
|
||||
int inum = list->inum;
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const {
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
// TODO: use RangePolicy instead, or thread over ncoeff?
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||
|
||||
|
@ -319,11 +347,11 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokk
|
|||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = d_bispectrum(ii,icoeff);
|
||||
double bveci = my_sna.blist(ii,icoeff);
|
||||
d_beta(ii,icoeff) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double bvecj = d_bispectrum(ii,jcoeff);
|
||||
double bvecj = my_sna.blist(ii,jcoeff);
|
||||
d_beta(ii,icoeff) += d_coeffi[k]*bvecj;
|
||||
d_beta(ii,jcoeff) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
|
@ -332,105 +360,6 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokk
|
|||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute bispectrum
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::compute_bispectrum()
|
||||
{
|
||||
int inum = list->inum;
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum> policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const {
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
const double radi = d_radelem[ielem];
|
||||
|
||||
const int num_neighs = d_numneigh[i];
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
// wj = weights for neighbors of I within cutoff
|
||||
// rcutij = cutoffs for neighbors of I within cutoff
|
||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
||||
|
||||
int ninside = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
|
||||
[&] (const int jj, int& count) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(itype,jtype) )
|
||||
count++;
|
||||
});
|
||||
},ninside);
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||
[&] (const int jj, int& offset, bool final) {
|
||||
//for (int jj = 0; jj < num_neighs; jj++) {
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
||||
if (final) {
|
||||
my_sna.rij(offset,0) = dx;
|
||||
my_sna.rij(offset,1) = dy;
|
||||
my_sna.rij(offset,2) = dz;
|
||||
my_sna.inside[offset] = j;
|
||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
team.team_barrier();
|
||||
|
||||
// compute Ui, Zi, and Bi for atom I
|
||||
|
||||
my_sna.compute_ui(team,ninside);
|
||||
team.team_barrier();
|
||||
|
||||
my_sna.compute_zi(team);
|
||||
team.team_barrier();
|
||||
|
||||
my_sna.compute_bi(team);
|
||||
team.team_barrier();
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
d_bispectrum(ii,icoeff) = my_sna.blist[icoeff];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
@ -496,30 +425,21 @@ void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
|
|||
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
||||
Kokkos::deep_copy(d_map,h_map);
|
||||
|
||||
// allocate memory for per OpenMP thread data which
|
||||
// is wrapped into the sna class
|
||||
|
||||
snaKK = SNAKokkos<DeviceType>(rfac0,twojmax,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
snaKK.grow_rij(0);
|
||||
snaKK.grow_rij(0,0);
|
||||
snaKK.init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
|
||||
auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
|
@ -553,6 +473,8 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
});
|
||||
},ninside);
|
||||
|
||||
d_ninside(ii) = ninside;
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||
[&] (const int jj, int& offset, bool final) {
|
||||
|
@ -568,43 +490,124 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
|
||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
||||
if (final) {
|
||||
my_sna.rij(offset,0) = dx;
|
||||
my_sna.rij(offset,1) = dy;
|
||||
my_sna.rij(offset,2) = dz;
|
||||
my_sna.inside[offset] = j;
|
||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
||||
my_sna.rij(ii,offset,0) = dx;
|
||||
my_sna.rij(ii,offset,1) = dy;
|
||||
my_sna.rij(ii,offset,2) = dz;
|
||||
my_sna.inside(ii,offset) = j;
|
||||
my_sna.wj(ii,offset) = d_wjelem[elem_j];
|
||||
my_sna.rcutij(ii,offset) = (radi + d_radelem[elem_j])*rcutfac;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
team.team_barrier();
|
||||
}
|
||||
|
||||
// compute Ui, Yi for atom I
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.pre_ui(team,ii);
|
||||
}
|
||||
|
||||
my_sna.compute_ui(team,ninside);
|
||||
team.team_barrier();
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
|
||||
// for neighbors of I within cutoff:
|
||||
// compute Fij = dEi/dRj = -dEi/dRi
|
||||
// add to Fi, subtract from Fj
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||
if (ii >= inum) return;
|
||||
|
||||
my_sna.compute_yi(team,d_beta,ii);
|
||||
team.team_barrier();
|
||||
// Extract the neighbor number
|
||||
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||
my_sna.compute_ui(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_yi(team,ii,d_beta);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_zi(team,ii);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_bi(team,ii);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj>::member_type& team) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||
if (ii >= inum) return;
|
||||
|
||||
// Extract the neighbor number
|
||||
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_duidrj(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrj>::member_type& team) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||
if (ii >= inum) return;
|
||||
|
||||
// Extract the neighbor number
|
||||
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_deidrj(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_f = ScatterViewHelper<NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
|
||||
auto a_f = v_f.template access<AtomicDup<NEIGHFLAG,DeviceType>::value>();
|
||||
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
const int ninside = d_ninside(ii);
|
||||
|
||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||
[&] (const int jj) {
|
||||
//for (int jj = 0; jj < ninside; jj++) {
|
||||
int j = my_sna.inside[jj];
|
||||
|
||||
my_sna.compute_duidrj(team,&my_sna.rij(jj,0),
|
||||
my_sna.wj[jj],my_sna.rcutij[jj],jj);
|
||||
int j = my_sna.inside(ii,jj);
|
||||
|
||||
F_FLOAT fij[3];
|
||||
my_sna.compute_deidrj(team,fij);
|
||||
fij[0] = my_sna.dedr(ii,jj,0);
|
||||
fij[1] = my_sna.dedr(ii,jj,1);
|
||||
fij[2] = my_sna.dedr(ii,jj,2);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
a_f(i,0) += fij[0];
|
||||
|
@ -620,8 +623,8 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
if (vflag_either) {
|
||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||
fij[0],fij[1],fij[2],
|
||||
-my_sna.rij(jj,0),-my_sna.rij(jj,1),
|
||||
-my_sna.rij(jj,2));
|
||||
-my_sna.rij(ii,jj,0),-my_sna.rij(ii,jj,1),
|
||||
-my_sna.rij(ii,jj,2));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -633,6 +636,11 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
if (EVFLAG) {
|
||||
if (eflag_either) {
|
||||
|
||||
const int itype = type(i);
|
||||
const int ielem = d_map[itype];
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||
|
||||
Kokkos::single(Kokkos::PerTeam(team), [&] () {
|
||||
|
||||
// evdwl = energy of atom I, sum over coeffs_k * Bi_k
|
||||
|
@ -644,17 +652,17 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
// linear contributions
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
evdwl += d_coeffi[icoeff+1]*d_bispectrum(ii,icoeff);
|
||||
evdwl += d_coeffi[icoeff+1]*my_sna.blist(ii,icoeff);
|
||||
|
||||
// quadratic contributions
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = d_bispectrum(ii,icoeff);
|
||||
double bveci = my_sna.blist(ii,icoeff);
|
||||
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double bvecj = d_bispectrum(ii,jcoeff);
|
||||
double bvecj = my_sna.blist(ii,jcoeff);
|
||||
evdwl += d_coeffi[k++]*bveci*bvecj;
|
||||
}
|
||||
}
|
||||
|
@ -671,9 +679,9 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG
|
|||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPCompute<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -738,4 +746,5 @@ double PairSNAPKokkos<DeviceType>::memory_usage()
|
|||
bytes += snaKK.memory_usage();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
typedef double SNAreal;
|
||||
typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
|
||||
struct SNAKK_ZINDICES {
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||
};
|
||||
|
@ -39,12 +42,22 @@ class SNAKokkos {
|
|||
public:
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<double*, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<double***[3], Kokkos::LayoutRight, DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<double**[3], Kokkos::LayoutRight, DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<double*****, Kokkos::LayoutRight, DeviceType> t_sna_5d;
|
||||
typedef Kokkos::View<double*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
|
||||
|
||||
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
|
||||
typedef Kokkos::View<SNAcomplex*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
|
||||
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_cpu;
|
||||
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
|
||||
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
|
||||
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
|
@ -63,36 +76,31 @@ inline
|
|||
inline
|
||||
void init(); //
|
||||
|
||||
inline
|
||||
T_INT size_team_scratch_arrays();
|
||||
|
||||
inline
|
||||
T_INT size_thread_scratch_arrays();
|
||||
|
||||
double memory_usage();
|
||||
|
||||
int ncoeff;
|
||||
|
||||
// functions for bispectrum coefficients
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta, const int ii); // ForceSNAP
|
||||
void compute_ui_orig(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
|
||||
// functions for derivatives
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double*, double, double, int); //ForceSNAP
|
||||
void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double *); // ForceSNAP
|
||||
void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
@ -109,29 +117,26 @@ inline
|
|||
|
||||
|
||||
// Per InFlight Particle
|
||||
t_sna_2d rij;
|
||||
t_sna_1i inside;
|
||||
t_sna_1d wj;
|
||||
t_sna_1d rcutij;
|
||||
int nmax;
|
||||
t_sna_3d rij;
|
||||
t_sna_2i inside;
|
||||
t_sna_2d wj;
|
||||
t_sna_2d rcutij;
|
||||
t_sna_3d dedr;
|
||||
int natom, nmax;
|
||||
|
||||
void grow_rij(int);
|
||||
void grow_rij(int, int);
|
||||
|
||||
int twojmax, diagonalstyle;
|
||||
// Per InFlight Particle
|
||||
t_sna_1d blist;
|
||||
t_sna_1d ulisttot_r, ulisttot_i;
|
||||
t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a;
|
||||
t_sna_1d zlist_r, zlist_i;
|
||||
t_sna_2d ulist_r_ij, ulist_i_ij;
|
||||
|
||||
// Per InFlight Interaction
|
||||
t_sna_1d ulist_r, ulist_i;
|
||||
t_sna_1d_atomic ylist_r, ylist_i;
|
||||
t_sna_2d blist;
|
||||
t_sna_2c_cpu ulisttot;
|
||||
t_sna_2c zlist;
|
||||
|
||||
t_sna_3c ulist;
|
||||
t_sna_2c ylist;
|
||||
|
||||
// derivatives of data
|
||||
t_sna_2d dulist_r, dulist_i;
|
||||
t_sna_2d dblist;
|
||||
t_sna_4c dulist;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
@ -168,14 +173,14 @@ inline
|
|||
inline
|
||||
void init_rootpqarray(); // init()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // compute_ui
|
||||
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double); // compute_ui
|
||||
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, double); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double, double, double, int); // compute_ui
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, double, double, double); // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
double, double, double,
|
||||
double, double); // compute_ui
|
||||
inline
|
||||
|
@ -184,9 +189,9 @@ inline
|
|||
inline
|
||||
int compute_ncoeff(); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
double, double, double, // compute_duidrj
|
||||
double, double, double, double, double, int);
|
||||
double, double, double, double, double);
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
|
@ -197,7 +202,7 @@ inline
|
|||
double wself;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bzero; // array of B values for isolated atoms
|
||||
Kokkos::View<double*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -41,8 +41,6 @@ SNAKokkos<DeviceType>::SNAKokkos(double rfac0_in,
|
|||
|
||||
ncoeff = compute_ncoeff();
|
||||
|
||||
//create_twojmax_arrays();
|
||||
|
||||
nmax = 0;
|
||||
|
||||
build_indexlist();
|
||||
|
@ -63,37 +61,6 @@ SNAKokkos<DeviceType>::SNAKokkos(double rfac0_in,
|
|||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAKokkos<DeviceType>::SNAKokkos(const SNAKokkos<DeviceType>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team) {
|
||||
wself = sna.wself;
|
||||
|
||||
rfac0 = sna.rfac0;
|
||||
rmin0 = sna.rmin0;
|
||||
switch_flag = sna.switch_flag;
|
||||
bzero_flag = sna.bzero_flag;
|
||||
|
||||
twojmax = sna.twojmax;
|
||||
|
||||
ncoeff = sna.ncoeff;
|
||||
nmax = sna.nmax;
|
||||
idxz = sna.idxz;
|
||||
idxb = sna.idxb;
|
||||
idxcg_max = sna.idxcg_max;
|
||||
idxu_max = sna.idxu_max;
|
||||
idxz_max = sna.idxz_max;
|
||||
idxb_max = sna.idxb_max;
|
||||
idxcg_block = sna.idxcg_block;
|
||||
idxu_block = sna.idxu_block;
|
||||
idxz_block = sna.idxz_block;
|
||||
idxb_block = sna.idxb_block;
|
||||
cglist = sna.cglist;
|
||||
rootpqarray = sna.rootpqarray;
|
||||
bzero = sna.bzero;
|
||||
create_team_scratch_arrays(team);
|
||||
create_thread_scratch_arrays(team);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -245,18 +212,80 @@ void SNAKokkos<DeviceType>::init()
|
|||
|
||||
template<class DeviceType>
|
||||
inline
|
||||
void SNAKokkos<DeviceType>::grow_rij(int newnmax)
|
||||
void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
|
||||
{
|
||||
if(newnmax <= nmax) return;
|
||||
if(newnatom <= natom && newnmax <= nmax) return;
|
||||
natom = newnatom;
|
||||
nmax = newnmax;
|
||||
|
||||
rij = t_sna_3d("sna:rij",natom,nmax,3);
|
||||
inside = t_sna_2i("sna:inside",natom,nmax);
|
||||
wj = t_sna_2d("sna:wj",natom,nmax);
|
||||
rcutij = t_sna_2d("sna:rcutij",natom,nmax);
|
||||
dedr = t_sna_3d("sna:dedr",natom,nmax,3);
|
||||
|
||||
blist = t_sna_2d("sna:blist",natom,idxb_max);
|
||||
ulisttot = t_sna_2c_cpu("sna:ulisttot",natom,idxu_max);
|
||||
zlist = t_sna_2c("sna:zlist",natom,idxz_max);
|
||||
|
||||
ulist = t_sna_3c("sna:ulist",natom,nmax,idxu_max);
|
||||
ylist = t_sna_2c("sna:ylist",natom,idxu_max);
|
||||
|
||||
dulist = t_sna_4c("sna:dulist",natom,nmax,idxu_max);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* compute Ui by summing over neighbors j
|
||||
* ------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
{
|
||||
if(team.team_rank() == 0) {
|
||||
zero_uarraytot(team,iatom);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
addself_uarraytot(team,iatom,wself);
|
||||
//});
|
||||
}
|
||||
team.team_barrier();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Ui by summing over neighbors j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int jnum)
|
||||
void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
double rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
// utot(j,ma,ma) = 1 for all j,ma
|
||||
// for j in neighbors of i:
|
||||
// compute r0 = (x,y,z,z0)
|
||||
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
|
||||
|
||||
x = rij(iatom,jnbor,0);
|
||||
y = rij(iatom,jnbor,1);
|
||||
z = rij(iatom,jnbor,2);
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
|
||||
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0);
|
||||
// theta0 = (r - rmin0) * rscale0;
|
||||
z0 = r / tan(theta0);
|
||||
|
||||
compute_uarray(team, iatom, jnbor, x, y, z, z0, r);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
add_uarraytot(team, iatom, jnbor, r, wj(iatom,jnbor), rcutij(iatom,jnbor));
|
||||
//});
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_ui_orig(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnum)
|
||||
{
|
||||
double rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
|
@ -267,9 +296,9 @@ void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceT
|
|||
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
|
||||
|
||||
if(team.team_rank() == 0) {
|
||||
zero_uarraytot(team);
|
||||
zero_uarraytot(team,iatom);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
addself_uarraytot(team,wself);
|
||||
addself_uarraytot(team,iatom,wself);
|
||||
//});
|
||||
}
|
||||
team.team_barrier();
|
||||
|
@ -277,19 +306,19 @@ void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceT
|
|||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,jnum),
|
||||
[&] (const int& j) {
|
||||
//for(int j = 0; j < jnum; j++) {
|
||||
x = rij(j,0);
|
||||
y = rij(j,1);
|
||||
z = rij(j,2);
|
||||
x = rij(iatom,j,0);
|
||||
y = rij(iatom,j,1);
|
||||
z = rij(iatom,j,2);
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
|
||||
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
|
||||
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij(iatom,j) - rmin0);
|
||||
// theta0 = (r - rmin0) * rscale0;
|
||||
z0 = r / tan(theta0);
|
||||
|
||||
compute_uarray(team,x, y, z, z0, r);
|
||||
compute_uarray(team, iatom, j, x, y, z, z0, r);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
add_uarraytot(team,r, wj[j], rcutij[j], j);
|
||||
add_uarraytot(team, iatom, j, r, wj(iatom,j), rcutij(iatom,j));
|
||||
//});
|
||||
});
|
||||
|
||||
|
@ -301,7 +330,7 @@ void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceT
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max),
|
||||
[&] (const int& jjz) {
|
||||
|
@ -318,8 +347,8 @@ void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
|
||||
const double* cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
|
||||
zlist_r[jjz] = 0.0;
|
||||
zlist_i[jjz] = 0.0;
|
||||
zlist(iatom,jjz).re = 0.0;
|
||||
zlist(iatom,jjz).im = 0.0;
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
|
@ -329,24 +358,19 @@ void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
|
||||
const double* u1_r = ulisttot_r.data() + jju1;
|
||||
const double* u1_i = ulisttot_i.data() + jju1;
|
||||
const double* u2_r = ulisttot_r.data() + jju2;
|
||||
const double* u2_i = ulisttot_i.data() + jju2;
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]);
|
||||
suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]);
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).re - ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).im + ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).re);
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
||||
zlist_r[jjz] += cgblock[icgb] * suma1_r;
|
||||
zlist_i[jjz] += cgblock[icgb] * suma1_i;
|
||||
zlist(iatom,jjz).re += cgblock[icgb] * suma1_r;
|
||||
zlist(iatom,jjz).im += cgblock[icgb] * suma1_i;
|
||||
|
||||
jju1 += j1+1;
|
||||
jju2 -= j2+1;
|
||||
|
@ -362,26 +386,22 @@ void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta, const int ii)
|
||||
void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta)
|
||||
{
|
||||
double betaj;
|
||||
const int ii = iatom;
|
||||
|
||||
{
|
||||
double* const ptr = ylist_r.data();
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_r.span()),
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist.extent(1)),
|
||||
[&] (const int& i) {
|
||||
ptr[i] = 0.0;
|
||||
});
|
||||
}
|
||||
{
|
||||
double* const ptr = ylist_i.data();
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_i.span()),
|
||||
[&] (const int& i) {
|
||||
ptr[i] = 0.0;
|
||||
ylist(iatom,i).re = 0.0;
|
||||
ylist(iatom,i).im = 0.0;
|
||||
});
|
||||
}
|
||||
|
||||
//int flopsum = 0;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max),
|
||||
[&] (const int& jjz) {
|
||||
//for(int jjz = 0; jjz < idxz_max; jjz++) {
|
||||
|
@ -410,18 +430,15 @@ void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
|
||||
const double* u1_r = ulisttot_r.data() + jju1;
|
||||
const double* u1_i = ulisttot_i.data() + jju1;
|
||||
const double* u2_r = ulisttot_r.data() + jju2;
|
||||
const double* u2_i = ulisttot_i.data() + jju2;
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]);
|
||||
suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); ma1++;
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).re - ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).im + ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).re);
|
||||
//flopsum += 10;
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
@ -458,11 +475,13 @@ void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
ylist_r[jju] += betaj*ztmp_r;
|
||||
ylist_i[jju] += betaj*ztmp_i;
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).re), betaj*ztmp_r);
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).im), betaj*ztmp_i);
|
||||
});
|
||||
|
||||
}); // end loop over jjz
|
||||
|
||||
//printf("sum %i\n",flopsum);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -471,7 +490,7 @@ void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double* dedr)
|
||||
void SNAKokkos<DeviceType>::compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
t_scalar3<double> sum;
|
||||
|
||||
|
@ -482,9 +501,9 @@ void SNAKokkos<DeviceType>::compute_deidrj(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
for(int mb = 0; 2*mb < j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++) {
|
||||
sum_tmp.x += dulist_r(jju,0) * ylist_r[jju] + dulist_i(jju,0) * ylist_i[jju];
|
||||
sum_tmp.y += dulist_r(jju,1) * ylist_r[jju] + dulist_i(jju,1) * ylist_i[jju];
|
||||
sum_tmp.z += dulist_r(jju,2) * ylist_r[jju] + dulist_i(jju,2) * ylist_i[jju];
|
||||
sum_tmp.x += dulist(iatom,jnbor,jju,0).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,0).im * ylist(iatom,jju).im;
|
||||
sum_tmp.y += dulist(iatom,jnbor,jju,1).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,1).im * ylist(iatom,jju).im;
|
||||
sum_tmp.z += dulist(iatom,jnbor,jju,2).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,2).im * ylist(iatom,jju).im;
|
||||
jju++;
|
||||
} //end loop over ma mb
|
||||
|
||||
|
@ -494,24 +513,25 @@ void SNAKokkos<DeviceType>::compute_deidrj(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
int mb = j/2;
|
||||
for(int ma = 0; ma < mb; ma++) {
|
||||
sum_tmp.x += dulist_r(jju,0) * ylist_r[jju] + dulist_i(jju,0) * ylist_i[jju];
|
||||
sum_tmp.y += dulist_r(jju,1) * ylist_r[jju] + dulist_i(jju,1) * ylist_i[jju];
|
||||
sum_tmp.z += dulist_r(jju,2) * ylist_r[jju] + dulist_i(jju,2) * ylist_i[jju];
|
||||
sum_tmp.x += dulist(iatom,jnbor,jju,0).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,0).im * ylist(iatom,jju).im;
|
||||
sum_tmp.y += dulist(iatom,jnbor,jju,1).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,1).im * ylist(iatom,jju).im;
|
||||
sum_tmp.z += dulist(iatom,jnbor,jju,2).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,2).im * ylist(iatom,jju).im;
|
||||
jju++;
|
||||
}
|
||||
|
||||
//int ma = mb;
|
||||
sum_tmp.x += (dulist_r(jju,0) * ylist_r[jju] + dulist_i(jju,0) * ylist_i[jju])*0.5;
|
||||
sum_tmp.y += (dulist_r(jju,1) * ylist_r[jju] + dulist_i(jju,1) * ylist_i[jju])*0.5;
|
||||
sum_tmp.z += (dulist_r(jju,2) * ylist_r[jju] + dulist_i(jju,2) * ylist_i[jju])*0.5;
|
||||
sum_tmp.x += (dulist(iatom,jnbor,jju,0).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,0).im * ylist(iatom,jju).im)*0.5;
|
||||
sum_tmp.y += (dulist(iatom,jnbor,jju,1).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,1).im * ylist(iatom,jju).im)*0.5;
|
||||
sum_tmp.z += (dulist(iatom,jnbor,jju,2).re * ylist(iatom,jju).re + dulist(iatom,jnbor,jju,2).im * ylist(iatom,jju).im)*0.5;
|
||||
} // end if jeven
|
||||
|
||||
},sum); // end loop over j
|
||||
//}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
dedr[0] = sum.x*2.0;
|
||||
dedr[1] = sum.y*2.0;
|
||||
dedr[2] = sum.z*2.0;
|
||||
dedr(iatom,jnbor,0) = sum.x*2.0;
|
||||
dedr(iatom,jnbor,1) = sum.y*2.0;
|
||||
dedr(iatom,jnbor,2) = sum.z*2.0;
|
||||
});
|
||||
|
||||
}
|
||||
|
@ -522,7 +542,7 @@ void SNAKokkos<DeviceType>::compute_deidrj(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
|
@ -555,8 +575,8 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
const int jjz_index = jjz+mb*(j+1)+ma;
|
||||
if (2*mb == j) return;
|
||||
sum +=
|
||||
ulisttot_r(jju_index) * zlist_r(jjz_index) +
|
||||
ulisttot_i(jju_index) * zlist_i(jjz_index);
|
||||
ulisttot(iatom,jju_index).re * zlist(iatom,jjz_index).re +
|
||||
ulisttot(iatom,jju_index).im * zlist(iatom,jjz_index).im;
|
||||
},sumzu_temp); // end loop over ma, mb
|
||||
sumzu += sumzu_temp;
|
||||
|
||||
|
@ -570,8 +590,8 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sum +=
|
||||
ulisttot_r(jju_index) * zlist_r(jjz_index) +
|
||||
ulisttot_i(jju_index) * zlist_i(jjz_index);
|
||||
ulisttot(iatom,jju_index).re * zlist(iatom,jjz_index).re +
|
||||
ulisttot(iatom,jju_index).im * zlist(iatom,jjz_index).im;
|
||||
},sumzu_temp); // end loop over ma
|
||||
sumzu += sumzu_temp;
|
||||
|
||||
|
@ -579,8 +599,8 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sumzu += 0.5*
|
||||
(ulisttot_r(jju_index) * zlist_r(jjz_index) +
|
||||
ulisttot_i(jju_index) * zlist_i(jjz_index));
|
||||
(ulisttot(iatom,jju_index).re * zlist(iatom,jjz_index).re +
|
||||
ulisttot(iatom,jju_index).im * zlist(iatom,jjz_index).im);
|
||||
} // end if jeven
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
|
@ -591,229 +611,50 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
|
|||
if (bzero_flag)
|
||||
sumzu -= bzero[j];
|
||||
|
||||
blist(jjb) = sumzu;
|
||||
blist(iatom,jjb) = sumzu;
|
||||
});
|
||||
});
|
||||
//} // end loop over j
|
||||
//} // end loop over j1, j2
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate derivative of Bi w.r.t. atom j
|
||||
variant using indexlist for j1,j2,j
|
||||
variant using symmetry relation
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
// for j = |j1-j2|,Min(twojmax,j1+j2),2
|
||||
// zdb = 0
|
||||
// for mb = 0,...,jmid
|
||||
// for ma = 0,...,j
|
||||
// zdb +=
|
||||
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
|
||||
// dbdr(j1,j2,j) += 2*zdb
|
||||
// zdb = 0
|
||||
// for mb1 = 0,...,j1mid
|
||||
// for ma1 = 0,...,j1
|
||||
// zdb +=
|
||||
// Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
|
||||
// dbdr(j1,j2,j) += 2*zdb*(j+1)/(j1+1)
|
||||
// zdb = 0
|
||||
// for mb2 = 0,...,j2mid
|
||||
// for ma2 = 0,...,j2
|
||||
// zdb +=
|
||||
// Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
|
||||
// dbdr(j1,j2,j) += 2*zdb*(j+1)/(j2+1)
|
||||
|
||||
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxb_max),
|
||||
[&] (const int& jjb) {
|
||||
//for(int jjb = 0; jjb < idxb_max; jjb++) {
|
||||
const int j1 = idxb[jjb].j1;
|
||||
const int j2 = idxb[jjb].j2;
|
||||
const int j = idxb[jjb].j;
|
||||
|
||||
// dbdr = dblist(jjb);
|
||||
// dbdr[0] = 0.0;
|
||||
// dbdr[1] = 0.0;
|
||||
// dbdr[2] = 0.0;
|
||||
|
||||
t_scalar3<double> dbdr,sumzdu_r;
|
||||
// Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
|
||||
|
||||
int jjz = idxz_block(j1,j2,j);
|
||||
int jju = idxu_block[j];
|
||||
|
||||
for(int mb = 0; 2*mb < j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++) {
|
||||
const int jju_index = jju+mb*(j+1)+ma;
|
||||
const int jjz_index = jjz+mb*(j+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
} //end loop over ma mb
|
||||
|
||||
// For j even, handle middle column
|
||||
|
||||
if (j%2 == 0) {
|
||||
int mb = j/2;
|
||||
for(int ma = 0; ma <= mb; ma++) {
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
}
|
||||
int ma = mb;
|
||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||
for(int k = 0; k < 3; k++) {
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5;
|
||||
}
|
||||
} // end if jeven
|
||||
|
||||
dbdr += 2.0*sumzdu_r;
|
||||
|
||||
// Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
|
||||
|
||||
double j1fac = (j+1)/(j1+1.0);
|
||||
|
||||
jjz = idxz_block(j,j2,j1);
|
||||
jju = idxu_block[j1];
|
||||
|
||||
sumzdu_r.x = 0.0; sumzdu_r.y = 0.0; sumzdu_r.z = 0.0;
|
||||
|
||||
for(int mb = 0; 2*mb < j1; mb++)
|
||||
for(int ma = 0; ma <= j1; ma++) {
|
||||
const int jju_index = jju+mb*(j1+1)+ma;
|
||||
const int jjz_index = jjz+mb*(j1+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
} //end loop over ma1 mb1
|
||||
|
||||
// For j1 even, handle middle column
|
||||
|
||||
if (j1%2 == 0) {
|
||||
const int mb = j1/2;
|
||||
for(int ma = 0; ma <= mb; ma++) {
|
||||
const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
}
|
||||
int ma = mb;
|
||||
const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma;
|
||||
for(int k = 0; k < 3; k++) {
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5;
|
||||
}
|
||||
} // end if j1even
|
||||
|
||||
dbdr += 2.0*sumzdu_r*j1fac;
|
||||
|
||||
// Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
|
||||
|
||||
double j2fac = (j+1)/(j2+1.0);
|
||||
|
||||
jjz = idxz_block(j,j1,j2);
|
||||
jju = idxu_block[j2];
|
||||
|
||||
sumzdu_r.x = 0.0; sumzdu_r.y = 0.0; sumzdu_r.z = 0.0;
|
||||
|
||||
for(int mb = 0; 2*mb < j2; mb++)
|
||||
for(int ma = 0; ma <= j2; ma++) {
|
||||
const int jju_index = jju+mb*(j2+1)+ma;
|
||||
const int jjz_index = jjz+mb*(j2+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
} //end loop over ma2 mb2
|
||||
|
||||
// For j2 even, handle middle column
|
||||
|
||||
if (j2%2 == 0) {
|
||||
const int mb = j2/2;
|
||||
for(int ma = 0; ma <= mb; ma++) {
|
||||
const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma;
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]);
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]);
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]);
|
||||
}
|
||||
int ma = mb;
|
||||
const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma;
|
||||
const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma;
|
||||
for(int k = 0; k < 3; k++) {
|
||||
sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5;
|
||||
sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5;
|
||||
}
|
||||
} // end if j2even
|
||||
|
||||
dbdr += 2.0*sumzdu_r*j2fac;
|
||||
dblist(jjb,0) = dbdr.x;
|
||||
dblist(jjb,1) = dbdr.y;
|
||||
dblist(jjb,2) = dbdr.z;
|
||||
|
||||
}); //end loop over j1 j2 j
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate derivative of Ui w.r.t. atom j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
double* rij, double wj, double rcut, int jj)
|
||||
void SNAKokkos<DeviceType>::compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
{
|
||||
double rsq, r, x, y, z, z0, theta0, cs, sn;
|
||||
double dz0dr;
|
||||
|
||||
x = rij[0];
|
||||
y = rij[1];
|
||||
z = rij[2];
|
||||
x = rij(iatom,jnbor,0);
|
||||
y = rij(iatom,jnbor,1);
|
||||
z = rij(iatom,jnbor,2);
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
double rscale0 = rfac0 * MY_PI / (rcut - rmin0);
|
||||
double rscale0 = rfac0 * MY_PI / (rcutij(iatom,jnbor) - rmin0);
|
||||
theta0 = (r - rmin0) * rscale0;
|
||||
cs = cos(theta0);
|
||||
sn = sin(theta0);
|
||||
z0 = r * cs / sn;
|
||||
dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||
|
||||
compute_duarray(team, x, y, z, z0, r, dz0dr, wj, rcut, jj);
|
||||
compute_duarray(team, iatom, jnbor, x, y, z, z0, r, dz0dr, wj(iatom,jnbor), rcutij(iatom,jnbor));
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
void SNAKokkos<DeviceType>::zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
{
|
||||
{
|
||||
double* const ptr = ulisttot_r.data();
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()),
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)),
|
||||
[&] (const int& i) {
|
||||
ptr[i] = 0.0;
|
||||
});
|
||||
}
|
||||
{
|
||||
double* const ptr = ulisttot_i.data();
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_i.span()),
|
||||
[&] (const int& i) {
|
||||
ptr[i] = 0.0;
|
||||
ulisttot(iatom,i).re = 0.0;
|
||||
ulisttot(iatom,i).im = 0.0;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -822,15 +663,15 @@ void SNAKokkos<DeviceType>::zero_uarraytot(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double wself_in)
|
||||
void SNAKokkos<DeviceType>::addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, double wself_in)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1),
|
||||
[&] (const int& j) {
|
||||
//for (int j = 0; j <= twojmax; j++)
|
||||
int jju = idxu_block[j];
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
ulisttot_r[jju] = wself_in;
|
||||
ulisttot_i[jju] = 0.0;
|
||||
ulisttot(iatom,jju).re = wself_in;
|
||||
ulisttot(iatom,jju).im = 0.0;
|
||||
jju += j+2;
|
||||
}
|
||||
});
|
||||
|
@ -842,28 +683,15 @@ void SNAKokkos<DeviceType>::addself_uarraytot(const typename Kokkos::TeamPolicy<
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
double r, double wj, double rcut, int j)
|
||||
void SNAKokkos<DeviceType>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
double r, double wj, double rcut)
|
||||
{
|
||||
const double sfac = compute_sfac(r, rcut) * wj;
|
||||
|
||||
const double* const ptr_r = ulist_r.data();
|
||||
const double* const ptr_i = ulist_i.data();
|
||||
double* const ptrtot_r = ulisttot_r.data();
|
||||
double* const ptrtot_i = ulisttot_i.data();
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
ulist_r_j(ulist_r_ij,j,Kokkos::ALL);
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
ulist_i_j(ulist_i_ij,j,Kokkos::ALL);
|
||||
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()),
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)),
|
||||
[&] (const int& i) {
|
||||
Kokkos::atomic_add(ptrtot_r+i, sfac * ptr_r[i]);
|
||||
Kokkos::atomic_add(ptrtot_i+i, sfac * ptr_i[i]);
|
||||
|
||||
ulist_r_j(i) = ulist_r(i);
|
||||
ulist_i_j(i) = ulist_i(i);
|
||||
Kokkos::atomic_add(&(ulisttot(iatom,i).re), sfac * ulist(iatom,jnbor,i).re);
|
||||
Kokkos::atomic_add(&(ulisttot(iatom,i).im), sfac * ulist(iatom,jnbor,i).im);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -873,7 +701,7 @@ void SNAKokkos<DeviceType>::add_uarraytot(const typename Kokkos::TeamPolicy<Devi
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
double x, double y, double z,
|
||||
double z0, double r)
|
||||
{
|
||||
|
@ -891,8 +719,8 @@ void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
// VMK Section 4.8.2
|
||||
|
||||
ulist_r[0] = 1.0;
|
||||
ulist_i[0] = 0.0;
|
||||
ulist(iatom,jnbor,0).re = 1.0;
|
||||
ulist(iatom,jnbor,0).im = 0.0;
|
||||
|
||||
for (int j = 1; j <= twojmax; j++) {
|
||||
int jju = idxu_block[j];
|
||||
|
@ -904,31 +732,31 @@ void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<Dev
|
|||
[&] (const int& mb) {
|
||||
//for (int mb = 0; 2*mb <= j; mb++) {
|
||||
const int jju_index = jju+mb+mb*j;
|
||||
ulist_r[jju_index] = 0.0;
|
||||
ulist_i[jju_index] = 0.0;
|
||||
ulist(iatom,jnbor,jju_index).re = 0.0;
|
||||
ulist(iatom,jnbor,jju_index).im = 0.0;
|
||||
|
||||
for (int ma = 0; ma < j; ma++) {
|
||||
const int jju_index = jju+mb+mb*j+ma;
|
||||
const int jjup_index = jjup+mb*j+ma;
|
||||
rootpq = rootpqarray(j - ma,j - mb);
|
||||
ulist_r[jju_index] +=
|
||||
ulist(iatom,jnbor,jju_index).re +=
|
||||
rootpq *
|
||||
(a_r * ulist_r[jjup_index] +
|
||||
a_i * ulist_i[jjup_index]);
|
||||
ulist_i[jju_index] +=
|
||||
(a_r * ulist(iatom,jnbor,jjup_index).re +
|
||||
a_i * ulist(iatom,jnbor,jjup_index).im);
|
||||
ulist(iatom,jnbor,jju_index).im +=
|
||||
rootpq *
|
||||
(a_r * ulist_i[jjup_index] -
|
||||
a_i * ulist_r[jjup_index]);
|
||||
(a_r * ulist(iatom,jnbor,jjup_index).im -
|
||||
a_i * ulist(iatom,jnbor,jjup_index).re);
|
||||
|
||||
rootpq = rootpqarray(ma + 1,j - mb);
|
||||
ulist_r[jju_index+1] =
|
||||
ulist(iatom,jnbor,jju_index+1).re =
|
||||
-rootpq *
|
||||
(b_r * ulist_r[jjup_index] +
|
||||
b_i * ulist_i[jjup_index]);
|
||||
ulist_i[jju_index+1] =
|
||||
(b_r * ulist(iatom,jnbor,jjup_index).re +
|
||||
b_i * ulist(iatom,jnbor,jjup_index).im);
|
||||
ulist(iatom,jnbor,jju_index+1).im =
|
||||
-rootpq *
|
||||
(b_r * ulist_i[jjup_index] -
|
||||
b_i * ulist_r[jjup_index]);
|
||||
(b_r * ulist(iatom,jnbor,jjup_index).im -
|
||||
b_i * ulist(iatom,jnbor,jjup_index).re);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -946,11 +774,11 @@ void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<Dev
|
|||
const int jju_index = jju+mb*(j+1)+ma;
|
||||
const int jjup_index = jjup-mb*(j+1)-ma;
|
||||
if (mapar == 1) {
|
||||
ulist_r[jjup_index] = ulist_r[jju_index];
|
||||
ulist_i[jjup_index] = -ulist_i[jju_index];
|
||||
ulist(iatom,jnbor,jjup_index).re = ulist(iatom,jnbor,jju_index).re;
|
||||
ulist(iatom,jnbor,jjup_index).im = -ulist(iatom,jnbor,jju_index).im;
|
||||
} else {
|
||||
ulist_r[jjup_index] = -ulist_r[jju_index];
|
||||
ulist_i[jjup_index] = ulist_i[jju_index];
|
||||
ulist(iatom,jnbor,jjup_index).re = -ulist(iatom,jnbor,jju_index).re;
|
||||
ulist(iatom,jnbor,jjup_index).im = ulist(iatom,jnbor,jju_index).im;
|
||||
}
|
||||
mapar = -mapar;
|
||||
}
|
||||
|
@ -965,10 +793,10 @@ void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<Dev
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
double x, double y, double z,
|
||||
double z0, double r, double dz0dr,
|
||||
double wj, double rcut, int jj)
|
||||
double wj, double rcut)
|
||||
{
|
||||
double r0inv;
|
||||
double a_r, a_i, b_r, b_i;
|
||||
|
@ -1012,17 +840,12 @@ void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<De
|
|||
db_i[0] += -r0inv;
|
||||
db_r[1] += r0inv;
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
ulist_r(ulist_r_ij,jj,Kokkos::ALL);
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
ulist_i(ulist_i_ij,jj,Kokkos::ALL);
|
||||
|
||||
dulist_r(0,0) = 0.0;
|
||||
dulist_r(0,1) = 0.0;
|
||||
dulist_r(0,2) = 0.0;
|
||||
dulist_i(0,0) = 0.0;
|
||||
dulist_i(0,1) = 0.0;
|
||||
dulist_i(0,2) = 0.0;
|
||||
dulist(iatom,jnbor,0,0).re = 0.0;
|
||||
dulist(iatom,jnbor,0,1).re = 0.0;
|
||||
dulist(iatom,jnbor,0,2).re = 0.0;
|
||||
dulist(iatom,jnbor,0,0).im = 0.0;
|
||||
dulist(iatom,jnbor,0,1).im = 0.0;
|
||||
dulist(iatom,jnbor,0,2).im = 0.0;
|
||||
|
||||
for (int j = 1; j <= twojmax; j++) {
|
||||
int jju = idxu_block[j];
|
||||
|
@ -1031,42 +854,42 @@ void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<De
|
|||
[&] (const int& mb) {
|
||||
//for (int mb = 0; 2*mb <= j; mb++) {
|
||||
const int jju_index = jju+mb+mb*j;
|
||||
dulist_r(jju_index,0) = 0.0;
|
||||
dulist_r(jju_index,1) = 0.0;
|
||||
dulist_r(jju_index,2) = 0.0;
|
||||
dulist_i(jju_index,0) = 0.0;
|
||||
dulist_i(jju_index,1) = 0.0;
|
||||
dulist_i(jju_index,2) = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,0).re = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,1).re = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,2).re = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,0).im = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,1).im = 0.0;
|
||||
dulist(iatom,jnbor,jju_index,2).im = 0.0;
|
||||
|
||||
for (int ma = 0; ma < j; ma++) {
|
||||
const int jju_index = jju+mb+mb*j+ma;
|
||||
const int jjup_index = jjup+mb*j+ma;
|
||||
rootpq = rootpqarray(j - ma,j - mb);
|
||||
for (int k = 0; k < 3; k++) {
|
||||
dulist_r(jju_index,k) +=
|
||||
rootpq * (da_r[k] * ulist_r[jjup_index] +
|
||||
da_i[k] * ulist_i[jjup_index] +
|
||||
a_r * dulist_r(jjup_index,k) +
|
||||
a_i * dulist_i(jjup_index,k));
|
||||
dulist_i(jju_index,k) +=
|
||||
rootpq * (da_r[k] * ulist_i[jjup_index] -
|
||||
da_i[k] * ulist_r[jjup_index] +
|
||||
a_r * dulist_i(jjup_index,k) -
|
||||
a_i * dulist_r(jjup_index,k));
|
||||
dulist(iatom,jnbor,jju_index,k).re +=
|
||||
rootpq * (da_r[k] * ulist(iatom,jnbor,jjup_index).re +
|
||||
da_i[k] * ulist(iatom,jnbor,jjup_index).im +
|
||||
a_r * dulist(iatom,jnbor,jjup_index,k).re +
|
||||
a_i * dulist(iatom,jnbor,jjup_index,k).im);
|
||||
dulist(iatom,jnbor,jju_index,k).im +=
|
||||
rootpq * (da_r[k] * ulist(iatom,jnbor,jjup_index).im -
|
||||
da_i[k] * ulist(iatom,jnbor,jjup_index).re +
|
||||
a_r * dulist(iatom,jnbor,jjup_index,k).im -
|
||||
a_i * dulist(iatom,jnbor,jjup_index,k).re);
|
||||
}
|
||||
|
||||
rootpq = rootpqarray(ma + 1,j - mb);
|
||||
for (int k = 0; k < 3; k++) {
|
||||
dulist_r(jju_index+1,k) =
|
||||
-rootpq * (db_r[k] * ulist_r[jjup_index] +
|
||||
db_i[k] * ulist_i[jjup_index] +
|
||||
b_r * dulist_r(jjup_index,k) +
|
||||
b_i * dulist_i(jjup_index,k));
|
||||
dulist_i(jju_index+1,k) =
|
||||
-rootpq * (db_r[k] * ulist_i[jjup_index] -
|
||||
db_i[k] * ulist_r[jjup_index] +
|
||||
b_r * dulist_i(jjup_index,k) -
|
||||
b_i * dulist_r(jjup_index,k));
|
||||
dulist(iatom,jnbor,jju_index+1,k).re =
|
||||
-rootpq * (db_r[k] * ulist(iatom,jnbor,jjup_index).re +
|
||||
db_i[k] * ulist(iatom,jnbor,jjup_index).im +
|
||||
b_r * dulist(iatom,jnbor,jjup_index,k).re +
|
||||
b_i * dulist(iatom,jnbor,jjup_index,k).im);
|
||||
dulist(iatom,jnbor,jju_index+1,k).im =
|
||||
-rootpq * (db_r[k] * ulist(iatom,jnbor,jjup_index).im -
|
||||
db_i[k] * ulist(iatom,jnbor,jjup_index).re +
|
||||
b_r * dulist(iatom,jnbor,jjup_index,k).im -
|
||||
b_i * dulist(iatom,jnbor,jjup_index,k).re);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -1086,13 +909,13 @@ void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<De
|
|||
const int jjup_index = jjup-mb*(j+1)-ma;
|
||||
if (mapar == 1) {
|
||||
for (int k = 0; k < 3; k++) {
|
||||
dulist_r(jjup_index,k) = dulist_r(jju_index,k);
|
||||
dulist_i(jjup_index,k) = -dulist_i(jju_index,k);
|
||||
dulist(iatom,jnbor,jjup_index,k).re = dulist(iatom,jnbor,jju_index,k).re;
|
||||
dulist(iatom,jnbor,jjup_index,k).im = -dulist(iatom,jnbor,jju_index,k).im;
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < 3; k++) {
|
||||
dulist_r(jjup_index,k) = -dulist_r(jju_index,k);
|
||||
dulist_i(jjup_index,k) = dulist_i(jju_index,k);
|
||||
dulist(iatom,jnbor,jjup_index,k).re = -dulist(iatom,jnbor,jju_index,k).re;
|
||||
dulist(iatom,jnbor,jjup_index,k).im = dulist(iatom,jnbor,jju_index,k).im;
|
||||
}
|
||||
}
|
||||
mapar = -mapar;
|
||||
|
@ -1110,89 +933,24 @@ void SNAKokkos<DeviceType>::compute_duarray(const typename Kokkos::TeamPolicy<De
|
|||
int jju = idxu_block[j];
|
||||
for (int mb = 0; 2*mb <= j; mb++)
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
dulist_r(jju,0) = dsfac * ulist_r[jju] * ux +
|
||||
sfac * dulist_r(jju,0);
|
||||
dulist_i(jju,0) = dsfac * ulist_i[jju] * ux +
|
||||
sfac * dulist_i(jju,0);
|
||||
dulist_r(jju,1) = dsfac * ulist_r[jju] * uy +
|
||||
sfac * dulist_r(jju,1);
|
||||
dulist_i(jju,1) = dsfac * ulist_i[jju] * uy +
|
||||
sfac * dulist_i(jju,1);
|
||||
dulist_r(jju,2) = dsfac * ulist_r[jju] * uz +
|
||||
sfac * dulist_r(jju,2);
|
||||
dulist_i(jju,2) = dsfac * ulist_i[jju] * uz +
|
||||
sfac * dulist_i(jju,2);
|
||||
dulist(iatom,jnbor,jju,0).re = dsfac * ulist(iatom,jnbor,jju).re * ux +
|
||||
sfac * dulist(iatom,jnbor,jju,0).re;
|
||||
dulist(iatom,jnbor,jju,0).im = dsfac * ulist(iatom,jnbor,jju).im * ux +
|
||||
sfac * dulist(iatom,jnbor,jju,0).im;
|
||||
dulist(iatom,jnbor,jju,1).re = dsfac * ulist(iatom,jnbor,jju).re * uy +
|
||||
sfac * dulist(iatom,jnbor,jju,1).re;
|
||||
dulist(iatom,jnbor,jju,1).im = dsfac * ulist(iatom,jnbor,jju).im * uy +
|
||||
sfac * dulist(iatom,jnbor,jju,1).im;
|
||||
dulist(iatom,jnbor,jju,2).re = dsfac * ulist(iatom,jnbor,jju).re * uz +
|
||||
sfac * dulist(iatom,jnbor,jju,2).re;
|
||||
dulist(iatom,jnbor,jju,2).im = dsfac * ulist(iatom,jnbor,jju).im * uz +
|
||||
sfac * dulist(iatom,jnbor,jju,2).im;
|
||||
|
||||
jju++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::create_team_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
{
|
||||
ulisttot_r_a = ulisttot_r = t_sna_1d(team.team_scratch(1),idxu_max);
|
||||
ulisttot_i_a = ulisttot_i = t_sna_1d(team.team_scratch(1),idxu_max);
|
||||
ylist_r = t_sna_1d(team.team_scratch(1),idxu_max);
|
||||
ylist_i = t_sna_1d(team.team_scratch(1),idxu_max);
|
||||
zlist_r = t_sna_1d(team.team_scratch(1),idxz_max);
|
||||
zlist_i = t_sna_1d(team.team_scratch(1),idxz_max);
|
||||
blist = t_sna_1d(team.team_scratch(1),idxb_max);
|
||||
|
||||
rij = t_sna_2d(team.team_scratch(1),nmax,3);
|
||||
rcutij = t_sna_1d(team.team_scratch(1),nmax);
|
||||
wj = t_sna_1d(team.team_scratch(1),nmax);
|
||||
inside = t_sna_1i(team.team_scratch(1),nmax);
|
||||
ulist_r_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max);
|
||||
ulist_i_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
inline
|
||||
T_INT SNAKokkos<DeviceType>::size_team_scratch_arrays() {
|
||||
T_INT size = 0;
|
||||
|
||||
size += t_sna_1d::shmem_size(idxu_max)*2; // ulisttot
|
||||
size += t_sna_1d::shmem_size(idxu_max)*2; // ylist
|
||||
size += t_sna_1d::shmem_size(idxz_max)*2; // zlist
|
||||
size += t_sna_1d::shmem_size(idxb_max); // blist
|
||||
|
||||
size += t_sna_2d::shmem_size(nmax,3); // rij
|
||||
size += t_sna_1d::shmem_size(nmax); // rcutij
|
||||
size += t_sna_1d::shmem_size(nmax); // wj
|
||||
size += t_sna_1i::shmem_size(nmax); // inside
|
||||
size += t_sna_2d::shmem_size(nmax,idxu_max)*2; // ulist_ij
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team)
|
||||
{
|
||||
dblist = t_sna_2d(team.thread_scratch(1),idxb_max,3);
|
||||
ulist_r = t_sna_1d(team.thread_scratch(1),idxu_max);
|
||||
ulist_i = t_sna_1d(team.thread_scratch(1),idxu_max);
|
||||
dulist_r = t_sna_2d(team.thread_scratch(1),idxu_max,3);
|
||||
dulist_i = t_sna_2d(team.thread_scratch(1),idxu_max,3);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
inline
|
||||
T_INT SNAKokkos<DeviceType>::size_thread_scratch_arrays() {
|
||||
T_INT size = 0;
|
||||
|
||||
size += t_sna_2d::shmem_size(idxb_max,3); // dblist
|
||||
size += t_sna_1d::shmem_size(idxu_max)*2; // ulist
|
||||
size += t_sna_2d::shmem_size(idxu_max,3)*2; // dulist
|
||||
return size;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
factorial n, wrapper for precomputed table
|
||||
------------------------------------------------------------------------- */
|
||||
|
@ -1558,14 +1316,13 @@ double SNAKokkos<DeviceType>::memory_usage()
|
|||
bytes += jdimpq*jdimpq * sizeof(double); // pqarray
|
||||
bytes += idxcg_max * sizeof(double); // cglist
|
||||
|
||||
bytes += idxu_max * sizeof(double) * 2; // ulist
|
||||
bytes += idxu_max * sizeof(double) * 2; // ulisttot
|
||||
bytes += idxu_max * 3 * sizeof(double) * 2; // dulist
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ulist
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot
|
||||
bytes += natom * idxu_max * 3 * sizeof(double) * 2; // dulist
|
||||
|
||||
bytes += idxz_max * sizeof(double) * 2; // zlist
|
||||
bytes += idxb_max * sizeof(double); // blist
|
||||
bytes += idxb_max * 3 * sizeof(double); // dblist
|
||||
bytes += idxu_max * sizeof(double) * 2; // ylist
|
||||
bytes += natom * idxz_max * sizeof(double) * 2; // zlist
|
||||
bytes += natom * idxb_max * sizeof(double); // blist
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ylist
|
||||
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
|
||||
bytes += jdim * sizeof(int); // idxu_block
|
||||
|
@ -1577,11 +1334,11 @@ double SNAKokkos<DeviceType>::memory_usage()
|
|||
|
||||
bytes += jdim * sizeof(double); // bzero
|
||||
|
||||
bytes += nmax * 3 * sizeof(double); // rij
|
||||
bytes += nmax * sizeof(int); // inside
|
||||
bytes += nmax * sizeof(double); // wj
|
||||
bytes += nmax * sizeof(double); // rcutij
|
||||
bytes += nmax * idxu_max * sizeof(double) * 2; // ulist_ij
|
||||
bytes += natom * nmax * 3 * sizeof(double); // rij
|
||||
bytes += natom * nmax * sizeof(int); // inside
|
||||
bytes += natom * nmax * sizeof(double); // wj
|
||||
bytes += natom * nmax * sizeof(double); // rcutij
|
||||
bytes += natom * nmax * idxu_max * sizeof(double) * 2; // ulist_ij
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue