forked from lijiext/lammps
Add loop chunking option to compute_orientorder_atom_kokkos
This commit is contained in:
parent
21f278f47f
commit
72a9ce0f32
|
@ -130,9 +130,6 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
|||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
|
||||
maxneigh = 0;
|
||||
Kokkos::parallel_reduce("ComputeOrientOrderAtomKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(maxneigh));
|
||||
|
||||
// grow order parameter array if necessary
|
||||
|
||||
if (atom->nmax > nmax) {
|
||||
|
@ -141,13 +138,22 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
|||
memoryKK->create_kokkos(k_qnarray,qnarray,nmax,ncol,"orientorder/atom:qnarray");
|
||||
array_atom = qnarray;
|
||||
d_qnarray = k_qnarray.template view<DeviceType>();
|
||||
}
|
||||
|
||||
d_qnm = t_sna_3c("orientorder/atom:qnm",nmax,nqlist,2*qmax+1);
|
||||
d_ncount = t_sna_1i("orientorder/atom:ncount",nmax);
|
||||
chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user
|
||||
chunk_offset = 0;
|
||||
|
||||
if (chunk_size > d_ncount.extent(0)) {
|
||||
d_qnm = t_sna_3c("orientorder/atom:qnm",chunk_size,nqlist,2*qmax+1);
|
||||
d_ncount = t_sna_1i("orientorder/atom:ncount",chunk_size);
|
||||
}
|
||||
|
||||
// insure distsq and nearest arrays are long enough
|
||||
|
||||
if (maxneigh > d_distsq.extent(1)) {
|
||||
maxneigh = 0;
|
||||
Kokkos::parallel_reduce("ComputeOrientOrderAtomKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(maxneigh));
|
||||
|
||||
if (chunk_size > d_distsq.extent(0) || maxneigh > d_distsq.extent(1)) {
|
||||
d_distsq = t_sna_2d_lr("orientorder/atom:distsq",nmax,maxneigh);
|
||||
d_nearest = t_sna_2i_lr("orientorder/atom:nearest",nmax,maxneigh);
|
||||
d_rlist = t_sna_3d_lr("orientorder/atom:rlist",nmax,maxneigh,3);
|
||||
|
@ -156,7 +162,6 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
|||
d_rlist_um = d_rlist;
|
||||
d_nearest_um = d_nearest;
|
||||
}
|
||||
}
|
||||
|
||||
// compute order parameter for each atom in group
|
||||
// use full neighbor list to count atoms less than cutoff
|
||||
|
@ -178,22 +183,30 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::compute_peratom()
|
|||
|
||||
copymode = 1;
|
||||
|
||||
while (chunk_offset < inum) { // chunk up loop to prevent running out of memory
|
||||
|
||||
if (chunk_size > inum - chunk_offset)
|
||||
chunk_size = inum - chunk_offset;
|
||||
|
||||
//Neigh
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh> policy_neigh(inum,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh> policy_neigh(chunk_size,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeOrientOrderAtomNeigh",policy_neigh,*this);
|
||||
|
||||
//Select3
|
||||
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomSelect3> policy_select3(0,inum);
|
||||
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomSelect3> policy_select3(0,chunk_size);
|
||||
Kokkos::parallel_for("ComputeOrientOrderAtomSelect3",policy_select3,*this);
|
||||
|
||||
//BOOP1
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1> policy_boop1(((inum+team_size-1)/team_size)*maxneigh,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1> policy_boop1(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP1",policy_boop1,*this);
|
||||
|
||||
//BOOP2
|
||||
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomBOOP2> policy_boop2(0,inum);
|
||||
typename Kokkos::RangePolicy<DeviceType, TagComputeOrientOrderAtomBOOP2> policy_boop2(0,chunk_size);
|
||||
Kokkos::parallel_for("ComputeOrientOrderAtomBOOP2",policy_boop2,*this);
|
||||
|
||||
chunk_offset += chunk_size;
|
||||
} // end while
|
||||
|
||||
copymode = 0;
|
||||
|
||||
k_qnarray.template modify<DeviceType>();
|
||||
|
@ -207,7 +220,7 @@ KOKKOS_INLINE_FUNCTION
|
|||
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomNeigh>::member_type& team) const
|
||||
{
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
if (mask[i] & groupbit) {
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
|
@ -263,7 +276,7 @@ template<class DeviceType>
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomSelect3,const int& ii) const {
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int ncount = d_ncount(ii);
|
||||
|
||||
// if not nnn neighbors, order parameter = 0;
|
||||
|
@ -287,11 +300,12 @@ KOKKOS_INLINE_FUNCTION
|
|||
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP1,const typename Kokkos::TeamPolicy<DeviceType, TagComputeOrientOrderAtomBOOP1>::member_type& team) const {
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((inum+team.team_size()-1)/team.team_size()));
|
||||
if (ii >= inum) return;
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() %
|
||||
((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
if (ii >= chunk_size) return;
|
||||
|
||||
// Extract the neighbor number
|
||||
const int jj = team.league_rank() / ((inum+team.team_size()-1)/team.team_size());
|
||||
const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
|
||||
const int ncount = d_ncount(ii);
|
||||
if (jj >= ncount) return;
|
||||
|
||||
|
@ -306,7 +320,6 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
|
|||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrderAtomBOOP2,const int& ii) const {
|
||||
const int i = d_ilist[ii];
|
||||
const int ncount = d_ncount(ii);
|
||||
calc_boop2(ncount, ii);
|
||||
}
|
||||
|
@ -338,14 +351,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::select3(int k, int n, int iatom) const
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::select3(int k, int n, int ii) const
|
||||
{
|
||||
int i,ir,j,l,mid,ia,itmp;
|
||||
double a,tmp,a3[3];
|
||||
|
||||
auto arr = Kokkos::subview(d_distsq_um, iatom, Kokkos::ALL);
|
||||
auto iarr = Kokkos::subview(d_nearest_um, iatom, Kokkos::ALL);
|
||||
auto arr3 = Kokkos::subview(d_rlist_um, iatom, Kokkos::ALL, Kokkos::ALL);
|
||||
auto arr = Kokkos::subview(d_distsq_um, ii, Kokkos::ALL);
|
||||
auto iarr = Kokkos::subview(d_nearest_um, ii, Kokkos::ALL);
|
||||
auto arr3 = Kokkos::subview(d_rlist_um, ii, Kokkos::ALL, Kokkos::ALL);
|
||||
|
||||
l = 0;
|
||||
ir = n-1;
|
||||
|
@ -414,11 +427,13 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::select3(int k, int n, int iatom)
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int ncount, int iatom, int ineigh) const
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int ncount, int ii, int ineigh) const
|
||||
{
|
||||
const double r0 = d_rlist(iatom,ineigh,0);
|
||||
const double r1 = d_rlist(iatom,ineigh,1);
|
||||
const double r2 = d_rlist(iatom,ineigh,2);
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
|
||||
const double r0 = d_rlist(ii,ineigh,0);
|
||||
const double r1 = d_rlist(ii,ineigh,1);
|
||||
const double r2 = d_rlist(ii,ineigh,2);
|
||||
const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
|
||||
if(rmag <= MY_EPSILON) {
|
||||
return;
|
||||
|
@ -439,27 +454,27 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int ncount, int iatom,
|
|||
for (int il = 0; il < nqlist; il++) {
|
||||
const int l = d_qlist[il];
|
||||
|
||||
//d_qnm(iatom,il,l).re += polar_prefactor(l, 0, costheta);
|
||||
//d_qnm(ii,il,l).re += polar_prefactor(l, 0, costheta);
|
||||
const double polar_pf = polar_prefactor(l, 0, costheta);
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,l).re), polar_pf);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
|
||||
SNAcomplex expphim = {expphi.re,expphi.im};
|
||||
for(int m = 1; m <= +l; m++) {
|
||||
const double prefactor = polar_prefactor(l, m, costheta);
|
||||
SNAcomplex c = {prefactor * expphim.re, prefactor * expphim.im};
|
||||
//d_qnm(iatom,il,m+l).re += c.re;
|
||||
//d_qnm(iatom,il,m+l).im += c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,m+l).re), c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,m+l).im), c.im);
|
||||
//d_qnm(ii,il,m+l).re += c.re;
|
||||
//d_qnm(ii,il,m+l).im += c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), c.im);
|
||||
if(m & 1) {
|
||||
//d_qnm(iatom,il,-m+l).re -= c.re;
|
||||
//d_qnm(iatom,il,-m+l).im += c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,-m+l).re), -c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,-m+l).im), c.im);
|
||||
//d_qnm(ii,il,-m+l).re -= c.re;
|
||||
//d_qnm(ii,il,-m+l).im += c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), c.im);
|
||||
} else {
|
||||
//d_qnm(iatom,il,-m+l).re += c.re;
|
||||
//d_qnm(iatom,il,-m+l).im -= c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,-m+l).re), c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(iatom,il,-m+l).im), -c.im);
|
||||
//d_qnm(ii,il,-m+l).re += c.re;
|
||||
//d_qnm(ii,il,-m+l).im -= c.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), c.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).im), -c.im);
|
||||
}
|
||||
SNAcomplex tmp;
|
||||
tmp.re = expphim.re*expphi.re - expphim.im*expphi.im;
|
||||
|
@ -476,16 +491,18 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int ncount, int iatom,
|
|||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int iatom) const
|
||||
void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) const
|
||||
{
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
|
||||
// convert sums to averages
|
||||
|
||||
double facn = 1.0 / ncount;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
d_qnm(iatom,il,m).re *= facn;
|
||||
d_qnm(iatom,il,m).im *= facn;
|
||||
d_qnm(ii,il,m).re *= facn;
|
||||
d_qnm(ii,il,m).im *= facn;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -498,8 +515,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int iatom)
|
|||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qm_sum = 0.0;
|
||||
for(int m = 0; m < 2*l+1; m++)
|
||||
qm_sum += d_qnm(iatom,il,m).re*d_qnm(iatom,il,m).re + d_qnm(iatom,il,m).im*d_qnm(iatom,il,m).im;
|
||||
d_qnarray(iatom,jj++) = qnormfac * sqrt(qm_sum);
|
||||
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
|
||||
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
|
||||
}
|
||||
|
||||
// calculate W_l
|
||||
|
@ -513,13 +530,13 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int iatom)
|
|||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(iatom,il,m1).re*d_qnm(iatom,il,m2).re - d_qnm(iatom,il,m1).im*d_qnm(iatom,il,m2).im;
|
||||
qm1qm2.im = d_qnm(iatom,il,m1).re*d_qnm(iatom,il,m2).im + d_qnm(iatom,il,m1).im*d_qnm(iatom,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(iatom,il,m).re + qm1qm2.im*d_qnm(iatom,il,m).im)*d_cglist[idxcg_count];
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
}
|
||||
}
|
||||
d_qnarray(iatom,jj++) = wlsum/sqrt(2.0*l+1.0);
|
||||
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,19 +551,19 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int iatom)
|
|||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
const int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(iatom,il,m1).re*d_qnm(iatom,il,m2).re - d_qnm(iatom,il,m1).im*d_qnm(iatom,il,m2).im;
|
||||
qm1qm2.im = d_qnm(iatom,il,m1).re*d_qnm(iatom,il,m2).im + d_qnm(iatom,il,m1).im*d_qnm(iatom,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(iatom,il,m).re + qm1qm2.im*d_qnm(iatom,il,m).im)*d_cglist[idxcg_count];
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
qm1qm2.im = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).im + d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).re;
|
||||
wlsum += (qm1qm2.re*d_qnm(ii,il,m).re + qm1qm2.im*d_qnm(ii,il,m).im)*d_cglist[idxcg_count];
|
||||
idxcg_count++;
|
||||
}
|
||||
}
|
||||
// Whats = [w/(q/np.sqrt(np.pi * 4 / (2 * l + 1)))**3 if abs(q) > 1.0e-6 else 0.0 for l,q,w in zip(range(1,max_l+1),Qs,Ws)]
|
||||
if (d_qnarray(iatom,il) < QEPSILON)
|
||||
d_qnarray(iatom,jj++) = 0.0;
|
||||
if (d_qnarray(i,il) < QEPSILON)
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
else {
|
||||
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
const double qnfac = qnormfac/d_qnarray(iatom,il);
|
||||
d_qnarray(iatom,jj++) = wlsum/sqrt(2.0*l+1.0)*(qnfac*qnfac*qnfac);
|
||||
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||
d_qnarray(i,jj++) = wlsum/sqrt(2.0*l+1.0)*(qnfac*qnfac*qnfac);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -556,17 +573,17 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int iatom)
|
|||
if (qlcompflag) {
|
||||
const int il = iqlcomp;
|
||||
const int l = qlcomp;
|
||||
if (d_qnarray(iatom,il) < QEPSILON)
|
||||
if (d_qnarray(i,il) < QEPSILON)
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
d_qnarray(iatom,jj++) = 0.0;
|
||||
d_qnarray(iatom,jj++) = 0.0;
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
}
|
||||
else {
|
||||
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
const double qnfac = qnormfac/d_qnarray(iatom,il);
|
||||
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
d_qnarray(iatom,jj++) = d_qnm(iatom,il,m).re * qnfac;
|
||||
d_qnarray(iatom,jj++) = d_qnm(iatom,il,m).im * qnfac;
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ class ComputeOrientOrderAtomKokkos : public ComputeOrientOrderAtom {
|
|||
typename AT::t_float_2d d_qnarray;
|
||||
|
||||
private:
|
||||
int inum;
|
||||
int inum,chunk_size,chunk_offset;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
|
|
|
@ -61,6 +61,7 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
|
|||
wlflag = 0;
|
||||
wlhatflag = 0;
|
||||
qlcompflag = 0;
|
||||
chunksize = 16384;
|
||||
|
||||
// specify which orders to request
|
||||
|
||||
|
@ -143,6 +144,13 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
|
|||
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||
cutsq = cutoff*cutoff;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"chunksize") == 0) {
|
||||
if (iarg+2 > narg)
|
||||
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||
chunksize = force->numeric(FLERR,arg[iarg+1]);
|
||||
if (chunksize <= 0)
|
||||
error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||
iarg += 2;
|
||||
} else error->all(FLERR,"Illegal compute orientorder/atom command");
|
||||
}
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@ class ComputeOrientOrderAtom : public Compute {
|
|||
virtual void init_clebsch_gordan();
|
||||
double *cglist; // Clebsch-Gordan coeffs
|
||||
int idxcg_max;
|
||||
int chunksize;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue