Restore original compute_h in fix_qeq_reax_kokkos

This commit is contained in:
Stan Moore 2019-06-07 16:31:31 -06:00
parent 73fa8d4055
commit 85999fc4a7
2 changed files with 100 additions and 19 deletions

View File

@ -227,26 +227,32 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
// compute_H
Kokkos::deep_copy(d_mfill_offset,0);
if (lmp->kokkos->ngpus == 0) { // CPU
if (neighflag == FULL) {
FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(this);
Kokkos::parallel_scan(inum,computeH_functor);
} else { // HALF and HALFTHREAD are the same
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(this);
Kokkos::parallel_scan(inum,computeH_functor);
}
} else { // GPU, use teams
Kokkos::deep_copy(d_mfill_offset,0);
int vector_length = 32;
int atoms_per_team = 4;
int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
int vector_length = 32;
int atoms_per_team = 4;
int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
Kokkos::TeamPolicy<DeviceType> policy(num_teams, atoms_per_team,
vector_length);
if (neighflag == FULL) {
FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(
this, atoms_per_team, vector_length);
Kokkos::parallel_for(policy, computeH_functor);
} else if (neighflag == HALF) {
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(
this, atoms_per_team, vector_length);
Kokkos::parallel_for(policy, computeH_functor);
} else {
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALFTHREAD> computeH_functor(
this, atoms_per_team, vector_length);
Kokkos::parallel_for(policy, computeH_functor);
Kokkos::TeamPolicy<DeviceType> policy(num_teams, atoms_per_team,
vector_length);
if (neighflag == FULL) {
FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(
this, atoms_per_team, vector_length);
Kokkos::parallel_for(policy, computeH_functor);
} else { // HALF and HALFTHREAD are the same
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(
this, atoms_per_team, vector_length);
Kokkos::parallel_for(policy, computeH_functor);
}
}
// init_matvec
@ -401,6 +407,68 @@ void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template <int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
{
const int i = d_ilist[ii];
int j,jj,jtype;
if (mask[i] & groupbit) {
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const tagint itag = tag(i);
const int jnum = d_numneigh[i];
if (final)
d_firstnbr[i] = m_fill;
for (jj = 0; jj < jnum; jj++) {
j = d_neighbors(i,jj);
j &= NEIGHMASK;
jtype = type(j);
const X_FLOAT delx = x(j,0) - xtmp;
const X_FLOAT dely = x(j,1) - ytmp;
const X_FLOAT delz = x(j,2) - ztmp;
if (NEIGHFLAG != FULL) {
// skip half of the interactions
const tagint jtag = tag(j);
if (j >= nlocal) {
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
}
}
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq > cutsq) continue;
if (final) {
const F_FLOAT r = sqrt(rsq);
d_jlist(m_fill) = j;
const F_FLOAT shldij = d_shield(itype,jtype);
d_val(m_fill) = calculate_H_k(r,shldij);
}
m_fill++;
}
if (final)
d_numnbrs[i] = m_fill - d_firstnbr[i];
}
}
/* ---------------------------------------------------------------------- */
// Calculate Qeq matrix H where H is a sparse matrix and H[i][j] represents the electrostatic interaction coefficients on atom-i with atom-j
// d_val - contains the non-zero entries of sparse matrix H
// d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i)

View File

@ -53,6 +53,10 @@ class FixQEqReaxKokkos : public FixQEqReax {
KOKKOS_INLINE_FUNCTION
void zero_item(int) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void compute_h_item(int, int &, const bool &) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void compute_h_team(const typename Kokkos::TeamPolicy <DeviceType> ::member_type &team, int, int) const;
@ -151,7 +155,6 @@ class FixQEqReaxKokkos : public FixQEqReax {
int allocated_flag;
int need_dup;
DAT::tdual_int_scalar k_mfill_offset;
typename AT::t_int_scalar d_mfill_offset;
typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
@ -254,9 +257,14 @@ struct FixQEqReaxKokkosMatVecFunctor {
template <class DeviceType, int NEIGHFLAG>
struct FixQEqReaxKokkosComputeHFunctor {
int atoms_per_team, vector_length;
typedef int value_type;
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
FixQEqReaxKokkos<DeviceType> c;
FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos<DeviceType>* c_ptr):c(*c_ptr) {
c.cleanup_copy();
};
FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos<DeviceType> *c_ptr,
int _atoms_per_team, int _vector_length)
: c(*c_ptr), atoms_per_team(_atoms_per_team),
@ -264,6 +272,11 @@ struct FixQEqReaxKokkosComputeHFunctor {
c.cleanup_copy();
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &m_fill, const bool &final) const {
c.template compute_h_item<NEIGHFLAG>(ii,m_fill,final);
}
KOKKOS_INLINE_FUNCTION
void operator()(
const typename Kokkos::TeamPolicy<DeviceType>::member_type &team) const {