Updates to Kokkos files

This commit is contained in:
Stan Moore 2016-12-15 11:18:50 -07:00
parent a9d26b3f4a
commit c0d6cbbdd3
10 changed files with 533 additions and 199 deletions

View File

@ -1801,6 +1801,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
if (mask & TYPE_MASK) atomKK->k_type.sync<LMPDeviceType>();
if (mask & MASK_MASK) atomKK->k_mask.sync<LMPDeviceType>();
if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPDeviceType>();
if (mask & DPDRHO_MASK) atomKK->k_rho.sync<LMPDeviceType>();
if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync<LMPDeviceType>();
if (mask & UCOND_MASK) atomKK->k_uCond.sync<LMPDeviceType>();
if (mask & UMECH_MASK) atomKK->k_uMech.sync<LMPDeviceType>();
if (mask & UCHEM_MASK) atomKK->k_uChem.sync<LMPDeviceType>();
if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPDeviceType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPDeviceType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPDeviceType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPDeviceType>();
} else {
if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
@ -1809,6 +1818,15 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
if (mask & TYPE_MASK) atomKK->k_type.sync<LMPHostType>();
if (mask & MASK_MASK) atomKK->k_mask.sync<LMPHostType>();
if (mask & IMAGE_MASK) atomKK->k_image.sync<LMPHostType>();
if (mask & DPDRHO_MASK) atomKK->k_rho.sync<LMPHostType>();
if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.sync<LMPHostType>();
if (mask & UCOND_MASK) atomKK->k_uCond.sync<LMPHostType>();
if (mask & UMECH_MASK) atomKK->k_uMech.sync<LMPHostType>();
if (mask & UCHEM_MASK) atomKK->k_uChem.sync<LMPHostType>();
if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPHostType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPHostType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPHostType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPHostType>();
}
}
@ -1831,6 +1849,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_rho,space);
if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_dpdTheta,space);
if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCond,space);
if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uMech,space);
if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uChem,space);
if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCG,space);
if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
} else {
if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
@ -1846,6 +1882,24 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
if ((mask & DPDRHO_MASK) && atomKK->k_rho.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_rho,space);
if ((mask & DPDTHETA_MASK) && atomKK->k_dpdTheta.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_dpdTheta,space);
if ((mask & UCOND_MASK) && atomKK->k_uCond.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCond,space);
if ((mask & UMECH_MASK) && atomKK->k_uMech.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uMech,space);
if ((mask & UCHEM_MASK) && atomKK->k_uChem.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uChem,space);
if ((mask & UCG_MASK) && atomKK->k_uCG.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCG,space);
if ((mask & UCGNEW_MASK) && atomKK->k_uCGnew.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
}
}
@ -1861,6 +1915,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
if (mask & TYPE_MASK) atomKK->k_type.modify<LMPDeviceType>();
if (mask & MASK_MASK) atomKK->k_mask.modify<LMPDeviceType>();
if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPDeviceType>();
if (mask & DPDRHO_MASK) atomKK->k_rho.modify<LMPDeviceType>();
if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify<LMPDeviceType>();
if (mask & UCOND_MASK) atomKK->k_uCond.modify<LMPDeviceType>();
if (mask & UMECH_MASK) atomKK->k_uMech.modify<LMPDeviceType>();
if (mask & UCHEM_MASK) atomKK->k_uChem.modify<LMPDeviceType>();
if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPDeviceType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPDeviceType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPDeviceType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPDeviceType>();
} else {
if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
@ -1869,6 +1932,15 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
if (mask & TYPE_MASK) atomKK->k_type.modify<LMPHostType>();
if (mask & MASK_MASK) atomKK->k_mask.modify<LMPHostType>();
if (mask & IMAGE_MASK) atomKK->k_image.modify<LMPHostType>();
if (mask & DPDRHO_MASK) atomKK->k_rho.modify<LMPHostType>();
if (mask & DPDTHETA_MASK) atomKK->k_dpdTheta.modify<LMPHostType>();
if (mask & UCOND_MASK) atomKK->k_uCond.modify<LMPHostType>();
if (mask & UMECH_MASK) atomKK->k_uMech.modify<LMPHostType>();
if (mask & UCHEM_MASK) atomKK->k_uChem.modify<LMPHostType>();
if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPHostType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPHostType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPHostType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPHostType>();
}
}

View File

@ -52,7 +52,7 @@ FixEOStableRXKokkos<DeviceType>::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char
template<class DeviceType>
FixEOStableRXKokkos<DeviceType>::~FixEOStableRXKokkos()
{
if (copymode) return;
}
/* ---------------------------------------------------------------------- */

View File

@ -52,6 +52,8 @@ PairDPDfdtEnergyKokkos<DeviceType>::PairDPDfdtEnergyKokkos(LAMMPS *lmp) : PairDP
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | TAG_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
cutsq = NULL;
}
@ -357,6 +359,7 @@ double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j)
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j);
k_cutsq.template modify<LMPHostType>();
return cutone;

View File

@ -54,8 +54,8 @@ PairExp6rxKokkos<DeviceType>::PairExp6rxKokkos(LAMMPS *lmp) : PairExp6rx(lmp)
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
k_error_flag = DAT::tdual_int_scalar("pair:error_flag");
}
@ -104,6 +104,8 @@ void PairExp6rxKokkos<DeviceType>::init_style()
template<class DeviceType>
void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
copymode = 1;
eflag = eflag_in;
vflag = vflag_in;
@ -141,7 +143,9 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
special_coul[3] = force->special_coul[3];
newton_pair = force->newton_pair;
copymode = 1;
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK);
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK);
else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK);
// Initialize the Exp6 parameter data for both the local
// and ghost atoms. Make the parameter data persistent
@ -185,10 +189,22 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
EV_FLOAT ev;
if (evflag) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,1> >(0,inum),*this,ev);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,0> >(0,inum),*this);
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,1> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,1,0> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,0,1> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALF,0,0> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,1,1> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,1,0> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,0,1> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairExp6rxCompute<HALFTHREAD,0,0> >(0,inum),*this);
}
}
k_error_flag.template modify<DeviceType>();
@ -246,6 +262,12 @@ template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
// These arrays are atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_uCG = uCG;
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_uCGnew = uCGnew;
int i,j,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
double rsq,r2inv,r6inv,forceExp6,factor_lj;
@ -287,6 +309,12 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW
itype = type[i];
jnum = d_numneigh[i];
double fx_i = 0.0;
double fy_i = 0.0;
double fz_i = 0.0;
double uCG_i = 0.0;
double uCGnew_i = 0.0;
{
epsilon1_i = PairExp6ParamData.epsilon1[i];
alpha1_i = PairExp6ParamData.alpha1[i];
@ -457,9 +485,9 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW
evdwlOld *= factor_lj;
uCG[i] += 0.5*evdwlOld;
uCG_i += 0.5*evdwlOld;
if (newton_pair || j < nlocal)
uCG[j] += 0.5*evdwlOld;
a_uCG[j] += 0.5*evdwlOld;
}
if(rm12_ij!=0.0 && rm21_ij!=0.0){
@ -537,28 +565,34 @@ void PairExp6rxKokkos<DeviceType>::operator()(TagPairExp6rxCompute<NEIGHFLAG,NEW
if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12;
else fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*fpairOldEXP6_21;
f(i,0) += delx*fpair;
f(i,1) += dely*fpair;
f(i,2) += delz*fpair;
fx_i += delx*fpair;
fy_i += dely*fpair;
fz_i += delz*fpair;
if (newton_pair || j < nlocal) {
f(j,0) -= delx*fpair;
f(j,1) -= dely*fpair;
f(j,2) -= delz*fpair;
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
if (isite1 == isite2) evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12;
else evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21;
evdwl *= factor_lj;
uCGnew[i] += 0.5*evdwl;
uCGnew_i += 0.5*evdwl;
if (newton_pair || j < nlocal)
uCGnew[j] += 0.5*evdwl;
a_uCGnew[j] += 0.5*evdwl;
evdwl = evdwlOld;
//if (vflag_either || eflag_atom)
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
}
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
a_uCG[i] += uCG_i;
a_uCGnew[i] += uCGnew_i;
}
template<class DeviceType>

View File

@ -68,8 +68,14 @@ PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
update_table = 0;
ntables = 0;
tables = NULL;
h_table = new TableHost();
d_table = new TableDevice();
k_error_flag = DAT::tdual_int_scalar("pair:error_flag");
}
@ -79,7 +85,10 @@ PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMult
template<class DeviceType>
PairMultiLucyRXKokkos<DeviceType>::~PairMultiLucyRXKokkos()
{
if (copymode) return;
delete h_table;
delete d_table;
}
/* ---------------------------------------------------------------------- */
@ -109,7 +118,7 @@ void PairMultiLucyRXKokkos<DeviceType>::init_style()
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");
error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk");
}
}
@ -118,6 +127,23 @@ void PairMultiLucyRXKokkos<DeviceType>::init_style()
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
if (update_table)
create_kokkos_tables();
if (tabstyle == LOOKUP)
compute_style<LOOKUP>(eflag_in,vflag_in);
else if(tabstyle == LINEAR)
compute_style<LINEAR>(eflag_in,vflag_in);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int TABSTYLE>
void PairMultiLucyRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
{
copymode = 1;
eflag = eflag_in;
vflag = vflag_in;
@ -145,10 +171,14 @@ void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
rho = atomKK->k_rho.view<DeviceType>();
uCG = atomKK->k_uCG.view<DeviceType>();
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
rho = atomKK->k_rho.view<DeviceType>();
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK);
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK);
else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK);
nlocal = atom->nlocal;
int nghost = atom->nghost;
@ -176,10 +206,22 @@ void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
EV_FLOAT ev;
if (evflag) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1> >(0,inum),*this,ev);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0> >(0,inum),*this);
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0,TABSTYLE> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,0,TABSTYLE> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,0,TABSTYLE> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,0,TABSTYLE> >(0,inum),*this);
}
}
k_error_flag.template modify<DeviceType>();
@ -223,9 +265,13 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXgetParams,
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii, EV_FLOAT& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
int i,j,jj,inum,jnum,itype,jtype,itable;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
double rsq;
@ -239,8 +285,6 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI
double fraction_i,fraction_j;
int jtable;
Table *tb;
int tlm1 = tablength - 1;
i = d_ilist[ii];
@ -274,26 +318,34 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI
fractionOld1_j = d_fractionOld1[j];
fractionOld2_j = d_fractionOld2[j];
tb = &tables[tabindex[itype][jtype]];
if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){
//tb = &tables[tabindex[itype][jtype]];
const int tidx = d_table_const.tabindex(itype,jtype);
//if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){
if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){
k_error_flag.d_view() = 1;
}
if (tabstyle == LOOKUP) {
itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
if (TABSTYLE == LOOKUP) {
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
jtable = static_cast<int> (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
if (itable >= tlm1 || jtable >= tlm1){
k_error_flag.d_view() = 2;
}
A_i = tb->f[itable];
A_j = tb->f[jtable];
//A_i = tb->f[itable];
A_i = d_table_const.f(tidx,itable);
//A_j = tb->f[jtable];
A_j = d_table_const.f(tidx,jtable);
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
fpair /= sqrt(rsq);
} else if (tabstyle == LINEAR) {
itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta);
jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
} else if (TABSTYLE == LINEAR) {
//itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta);
itable = static_cast<int> ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
jtable = static_cast<int> ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
if (itable >= tlm1 || jtable >= tlm1){
k_error_flag.d_view() = 2;
}
@ -302,15 +354,19 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI
if(jtable<0) jtable=0;
if(jtable>=tlm1)jtable=tlm1;
fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta);
//fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
//fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta);
fraction_j = (((rho[j]*rho[j]) - d_table_const.rsq(tidx,jtable)) * d_table_const.invdelta(tidx));
if(itable==0) fraction_i=0.0;
if(itable==tlm1) fraction_i=0.0;
if(jtable==0) fraction_j=0.0;
if(jtable==tlm1) fraction_j=0.0;
A_i = tb->f[itable] + fraction_i*tb->df[itable];
A_j = tb->f[jtable] + fraction_j*tb->df[jtable];
//A_i = tb->f[itable] + fraction_i*tb->df[itable];
A_i = d_table_const.f(tidx,itable) + fraction_i*d_table_const.df(tidx,itable);
//A_j = tb->f[jtable] + fraction_j*tb->df[jtable];
A_j = d_table_const.f(tidx,jtable) + fraction_j*d_table_const.df(tidx,jtable);
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
@ -325,29 +381,34 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI
fy_i += dely*fpair;
fz_i += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f(j,0) -= delx*fpair;
f(j,1) -= dely*fpair;
f(j,2) -= delz*fpair;
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
//if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz);
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,0.0,fpair,delx,dely,delz);
}
}
f(i,0) += fx_i;
f(i,1) += fy_i;
f(i,2) += fz_i;
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
tb = &tables[tabindex[itype][itype]];
itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
if (tabstyle == LOOKUP) evdwl = tb->e[itable];
else if (tabstyle == LINEAR){
//tb = &tables[tabindex[itype][itype]];
const int tidx = d_table_const.tabindex(itype,itype);
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//if (TABSTYLE == LOOKUP) evdwl = tb->e[itable];
if (TABSTYLE == LOOKUP) evdwl = d_table_const.e(tidx,itable);
else if (TABSTYLE == LINEAR){
if (itable >= tlm1){
k_error_flag.d_view() = 2;
}
if(itable==0) fraction_i=0.0;
else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
evdwl = tb->e[itable] + fraction_i*tb->de[itable];
//else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
//evdwl = tb->e[itable] + fraction_i*tb->de[itable];
evdwl = d_table_const.e(tidx,itable); + fraction_i*d_table_const.de(tidx,itable);
} else k_error_flag.d_view() = 3;
evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0;
@ -364,121 +425,11 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEI
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::coeff(int narg, char **arg)
{
if (narg != 6 && narg != 7) error->all(FLERR,"Illegal pair_coeff command");
bool rx_flag = false;
for (int i = 0; i < modify->nfix; i++)
if (strncmp(modify->fix[i]->style,"rx",2) == 0) rx_flag = true;
if (!rx_flag) error->all(FLERR,"PairMultiLucyRXKokkos<DeviceType> requires a fix rx command.");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
int me;
MPI_Comm_rank(world,&me);
tables = (Table *)
memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables");
Table *tb = &tables[ntables];
null_table(tb);
if (me == 0) read_table(tb,arg[2],arg[3]);
bcast_table(tb);
nspecies = atom->nspecies_dpd;
int n;
n = strlen(arg[3]) + 1;
site1 = new char[n];
strcpy(site1,arg[4]);
n = strlen(arg[4]) + 1;
site2 = new char[n];
strcpy(site2,arg[5]);
// set table cutoff
if (narg == 7) tb->cut = force->numeric(FLERR,arg[6]);
else if (tb->rflag) tb->cut = tb->rhi;
else tb->cut = tb->rfile[tb->ninput-1];
// error check on table parameters
// insure cutoff is within table
if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length");
if (tb->rflag == 0) {
rho_0 = tb->rfile[0];
} else {
rho_0 = tb->rlo;
}
tb->match = 0;
if (tabstyle == LINEAR && tb->ninput == tablength &&
tb->rflag == RSQ) tb->match = 1;
// spline read-in values and compute r,e,f vectors within table
if (tb->match == 0) spline_table(tb);
compute_table(tb);
// store ptr to table in tabindex
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
tabindex[i][j] = ntables;
setflag[i][j] = 1;
count++;
}
}
if (count == 0) error->all(FLERR,"Illegal pair_coeff command");
ntables++;
// Match site* to isite values.
if (strcmp(site1, "1fluid") == 0)
isite1 = oneFluidParameter;
else {
isite1 = nspecies;
for (int ispecies = 0; ispecies < nspecies; ++ispecies)
if (strcmp(site1, atom->dname[ispecies]) == 0){
isite1 = ispecies;
break;
}
if (isite1 == nspecies)
error->all(FLERR,"Pair_multi_lucy_rx site1 is invalid.");
}
if (strcmp(site2, "1fluid") == 0)
isite2 = oneFluidParameter;
else {
isite2 = nspecies;
for (int ispecies = 0; ispecies < nspecies; ++ispecies)
if (strcmp(site2, atom->dname[ispecies]) == 0){
isite2 = ispecies;
break;
}
if (isite2 == nspecies)
error->all(FLERR,"Pair_multi_lucy_rx site2 is invalid.");
}
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
@ -486,12 +437,16 @@ void PairMultiLucyRXKokkos<DeviceType>::coeff(int narg, char **arg)
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity()
{
copymode = 1;
x = atomKK->k_x.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
rho = atomKK->k_rho.view<DeviceType>();
h_rho = atomKK->k_rho.h_view;
nlocal = atom->nlocal;
//sync
atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK);
atomKK->modified(execution_space,DPDRHO_MASK);
const int inum = list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
@ -514,16 +469,34 @@ void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity()
if (newton_pair) m += atom->nghost;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXZero>(0,m),*this);
// rho = density at each atom
// loop over neighbors of my atoms
if (newton_pair)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0> >(0,inum),*this);
// rho = density at each atom
// loop over neighbors of my atoms
if (newton_pair) comm->reverse_comm_pair(this);
if (neighflag == HALF) {
if (newton_pair)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0> >(0,inum),*this);
} else if (neighflag == HALFTHREAD) {
if (newton_pair)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0> >(0,inum),*this);
}
// communicate and sum densities (on the host)
if (newton_pair) {
atomKK->modified(execution_space,DPDRHO_MASK);
atomKK->sync(Host,DPDRHO_MASK);
comm->reverse_comm_pair(this);
atomKK->modified(Host,DPDRHO_MASK);
atomKK->sync(execution_space,DPDRHO_MASK);
}
comm->forward_comm_pair(this);
copymode = 0;
}
template<class DeviceType>
@ -536,6 +509,10 @@ template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR>, const int &ii) const {
// The rho array is atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_rho = rho;
const int i = d_ilist[ii];
const double xtmp = x(i,0);
@ -567,7 +544,7 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLoca
const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4;
rho_i += factor;
if (NEWTON_PAIR || j < nlocal)
rho[j] += factor;
a_rho[j] += factor;
} else if (rsq < d_cutsq(itype,jtype)) {
const double rcut = sqrt(d_cutsq(itype,jtype));
const double tmpFactor = 1.0-sqrt(rsq)/rcut;
@ -575,12 +552,12 @@ void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLoca
const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4;
rho_i += factor;
if (NEWTON_PAIR || j < nlocal)
rho[j] += factor;
a_rho[j] += factor;
}
}
}
rho[i] = rho_i;
a_rho[i] = rho_i;
}
/* ---------------------------------------------------------------------- */
@ -630,16 +607,53 @@ void PairMultiLucyRXKokkos<DeviceType>::getParams(int id, double &fractionOld1,
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
int pbc_flag, int *pbc)
{
d_sendlist = k_sendlist.view<DeviceType>();
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXPackForwardComm>(0,n),*this);
DeviceType::fence();
return n;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const {
int j = d_sendlist(iswap, i);
v_buf[i] = rho[j];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
{
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXUnpackForwardComm>(0,n),*this);
DeviceType::fence();
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXUnpackForwardComm, const int &i) const {
rho[i + first] = v_buf[i];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int i,j,m;
rho = atomKK->k_rho.view<DeviceType>();
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = rho[j];
buf[m++] = h_rho[j];
}
return m;
}
@ -650,11 +664,10 @@ template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
int i,m,last;
rho = atomKK->k_rho.view<DeviceType>();
m = 0;
last = first + n;
for (i = first; i < last; i++) rho[i] = buf[m++];
for (i = first; i < last; i++) h_rho[i] = buf[m++];
}
/* ---------------------------------------------------------------------- */
@ -663,11 +676,10 @@ template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
rho = atomKK->k_rho.view<DeviceType>();
m = 0;
last = first + n;
for (i = first; i < last; i++) buf[m++] = rho[i];
for (i = first; i < last; i++) buf[m++] = h_rho[i];
return m;
}
@ -677,12 +689,11 @@ template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
rho = atomKK->k_rho.view<DeviceType>();
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
rho[j] += buf[m++];
h_rho[j] += buf[m++];
}
}
@ -782,6 +793,145 @@ void PairMultiLucyRXKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, con
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::create_kokkos_tables()
{
const int tlm1 = tablength-1;
memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq");
memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6");
if(tabstyle == LOOKUP) {
memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e");
memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f");
}
if(tabstyle == LINEAR) {
memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df");
}
for(int i=0; i < ntables; i++) {
Table* tb = &tables[i];
h_table->innersq[i] = tb->innersq;
h_table->invdelta[i] = tb->invdelta;
h_table->deltasq6[i] = tb->deltasq6;
for(int j = 0; j<h_table->rsq.dimension_1(); j++)
h_table->rsq(i,j) = tb->rsq[j];
for(int j = 0; j<h_table->drsq.dimension_1(); j++)
h_table->drsq(i,j) = tb->drsq[j];
for(int j = 0; j<h_table->e.dimension_1(); j++)
h_table->e(i,j) = tb->e[j];
for(int j = 0; j<h_table->de.dimension_1(); j++)
h_table->de(i,j) = tb->de[j];
for(int j = 0; j<h_table->f.dimension_1(); j++)
h_table->f(i,j) = tb->f[j];
for(int j = 0; j<h_table->df.dimension_1(); j++)
h_table->df(i,j) = tb->df[j];
for(int j = 0; j<h_table->e2.dimension_1(); j++)
h_table->e2(i,j) = tb->e2[j];
for(int j = 0; j<h_table->f2.dimension_1(); j++)
h_table->f2(i,j) = tb->f2[j];
}
Kokkos::deep_copy(d_table->innersq,h_table->innersq);
Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6);
Kokkos::deep_copy(d_table->rsq,h_table->rsq);
Kokkos::deep_copy(d_table->drsq,h_table->drsq);
Kokkos::deep_copy(d_table->e,h_table->e);
Kokkos::deep_copy(d_table->de,h_table->de);
Kokkos::deep_copy(d_table->f,h_table->f);
Kokkos::deep_copy(d_table->df,h_table->df);
Kokkos::deep_copy(d_table->e2,h_table->e2);
Kokkos::deep_copy(d_table->f2,h_table->f2);
Kokkos::deep_copy(d_table->tabindex,h_table->tabindex);
d_table_const.innersq = d_table->innersq;
d_table_const.invdelta = d_table->invdelta;
d_table_const.deltasq6 = d_table->deltasq6;
d_table_const.rsq = d_table->rsq;
d_table_const.drsq = d_table->drsq;
d_table_const.e = d_table->e;
d_table_const.de = d_table->de;
d_table_const.f = d_table->f;
d_table_const.df = d_table->df;
d_table_const.e2 = d_table->e2;
d_table_const.f2 = d_table->f2;
Kokkos::deep_copy(d_table->cutsq,h_table->cutsq);
update_table = 0;
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::allocate()
{
allocated = 1;
const int nt = atom->ntypes + 1;
memory->create(setflag,nt,nt,"pair:setflag");
memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq");
memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex");
d_table_const.cutsq = d_table->cutsq;
d_table_const.tabindex = d_table->tabindex;
memset(&setflag[0][0],0,nt*nt*sizeof(int));
memset(&cutsq[0][0],0,nt*nt*sizeof(double));
memset(&tabindex[0][0],0,nt*nt*sizeof(int));
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::settings(int narg, char **arg)
{
if (narg < 2) error->all(FLERR,"Illegal pair_style command");
// new settings
if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP;
else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR;
else error->all(FLERR,"Unknown table style in pair_style command");
tablength = force->inumeric(FLERR,arg[1]);
if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries");
// delete old tables, since cannot just change settings
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);
if (allocated) {
memory->destroy(setflag);
d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d();
h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d();
d_table_const.cutsq = d_table->cutsq = typename ArrayTypes<DeviceType>::t_ffloat_2d();
h_table->cutsq = typename ArrayTypes<LMPHostType>::t_ffloat_2d();
}
allocated = 0;
ntables = 0;
tables = NULL;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class PairMultiLucyRXKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA

View File

@ -29,9 +29,12 @@ PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos<LMPHostType>)
namespace LAMMPS_NS {
struct TagPairMultiLucyRXPackForwardComm{};
struct TagPairMultiLucyRXUnpackForwardComm{};
struct TagPairMultiLucyRXgetParams{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
struct TagPairMultiLucyRXCompute{};
struct TagPairMultiLucyRXZero{};
@ -50,24 +53,37 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX {
virtual ~PairMultiLucyRXKokkos();
void compute(int, int);
void settings(int, char **);
template<int TABSTYLE>
void compute_style(int, int);
void init_style();
void coeff(int, char **);
int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
int, int *);
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
void computeLocalDensity();
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXPackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXgetParams, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXZero, const int&) const;
@ -92,6 +108,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX {
double rcut_type11;
double factor_type11;
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
//struct Table {
// int ninput,rflag,fpflag,match;
// double rlo,rhi,fplo,fphi,cut;
@ -100,14 +118,47 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX {
// double innersq,delta,invdelta,deltasq6;
// double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
//};
//Table *tables;
int tabstyle,tablength;
/*struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread cutsq;
typename ArrayTypes<DeviceType>::t_int_2d_randomread tabindex;
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
};*/
//Its faster not to use texture fetch if the number of tables is less than 32!
struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
};
struct TableDevice {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
struct TableHost {
typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
TableDeviceConst d_table_const;
TableDevice* d_table;
TableHost* h_table;
int **tabindex;
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
//void read_table(Table *, char *, char *);
//void param_extract(Table *, char *);
char *site1, *site2;
void allocate();
int update_table;
void create_kokkos_tables();
void cleanup_copy();
KOKKOS_INLINE_FUNCTION
void getParams(int, double &, double &, double &, double &) const;
@ -118,6 +169,7 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX {
typename AT::t_f_array f;
typename AT::t_int_1d_randomread type;
typename AT::t_efloat_1d rho;
typename HAT::t_efloat_1d h_rho;
typename AT::t_efloat_1d uCG, uCGnew;
typename AT::t_float_2d dvector;
@ -135,6 +187,11 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX {
typename AT::tdual_ffloat_2d k_cutsq;
typename AT::t_ffloat_2d d_cutsq;
int iswap;
int first;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um v_buf;
friend void pair_virial_fdotr_compute<PairMultiLucyRXKokkos>(PairMultiLucyRXKokkos*);
};

View File

@ -127,6 +127,8 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) :
FixEOStableRX::~FixEOStableRX()
{
if (copymode) return;
for (int m = 0; m < ntables; m++) {
free_table(&tables[m]);
free_table(&tables2[m]);

View File

@ -78,6 +78,8 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp),
PairMultiLucyRX::~PairMultiLucyRX()
{
if (copymode) return;
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);

View File

@ -50,6 +50,8 @@ PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp)
PairTableRX::~PairTableRX()
{
if (copymode) return;
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);

View File

@ -42,6 +42,18 @@
#define ENERGY_MASK 0x00010000
#define VIRIAL_MASK 0x00020000
// DPD
#define DPDRHO_MASK 0x00040000
#define DPDTHETA_MASK 0x00080000
#define UCOND_MASK 0x00100000
#define UMECH_MASK 0x00200000
#define UCHEM_MASK 0x00400000
#define UCG_MASK 0x00800000
#define UCGNEW_MASK 0x01000000
#define DUCHEM_MASK 0x02000000
#define DVECTOR_MASK 0x04000000
// granular
#define RADIUS_MASK 0x00100000