diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 4cc6d1f219..1351c2138d 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -227,26 +227,32 @@ void FixQEqReaxKokkos::pre_force(int vflag) // compute_H - Kokkos::deep_copy(d_mfill_offset,0); + if (lmp->kokkos->ngpus == 0) { // CPU + if (neighflag == FULL) { + FixQEqReaxKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(inum,computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixQEqReaxKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(inum,computeH_functor); + } + } else { // GPU, use teams + Kokkos::deep_copy(d_mfill_offset,0); - int vector_length = 32; - int atoms_per_team = 4; - int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0); + int vector_length = 32; + int atoms_per_team = 4; + int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0); - Kokkos::TeamPolicy policy(num_teams, atoms_per_team, - vector_length); - if (neighflag == FULL) { - FixQEqReaxKokkosComputeHFunctor computeH_functor( - this, atoms_per_team, vector_length); - Kokkos::parallel_for(policy, computeH_functor); - } else if (neighflag == HALF) { - FixQEqReaxKokkosComputeHFunctor computeH_functor( - this, atoms_per_team, vector_length); - Kokkos::parallel_for(policy, computeH_functor); - } else { - FixQEqReaxKokkosComputeHFunctor computeH_functor( - this, atoms_per_team, vector_length); - Kokkos::parallel_for(policy, computeH_functor); + Kokkos::TeamPolicy policy(num_teams, atoms_per_team, + vector_length); + if (neighflag == FULL) { + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } } // init_matvec @@ -401,6 +407,68 @@ void FixQEqReaxKokkos::zero_item(int ii) const /* ---------------------------------------------------------------------- */ +template +template +KOKKOS_INLINE_FUNCTION +void FixQEqReaxKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +{ + const int i = d_ilist[ii]; + int j,jj,jtype; + + if (mask[i] & groupbit) { + + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = d_numneigh[i]; + if (final) + d_firstnbr[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + const X_FLOAT delx = x(j,0) - xtmp; + const X_FLOAT dely = x(j,1) - ytmp; + const X_FLOAT delz = x(j,2) - ztmp; + + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + } + } + + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if (rsq > cutsq) continue; + + if (final) { + const F_FLOAT r = sqrt(rsq); + d_jlist(m_fill) = j; + const F_FLOAT shldij = d_shield(itype,jtype); + d_val(m_fill) = calculate_H_k(r,shldij); + } + m_fill++; + } + if (final) + d_numnbrs[i] = m_fill - d_firstnbr[i]; + } +} + +/* ---------------------------------------------------------------------- */ + // Calculate Qeq matrix H where H is a sparse matrix and H[i][j] represents the electrostatic interaction coefficients on atom-i with atom-j // d_val - contains the non-zero entries of sparse matrix H // d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index d4132b47d2..cd69aa9283 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -53,6 +53,10 @@ class FixQEqReaxKokkos : public FixQEqReax { KOKKOS_INLINE_FUNCTION void zero_item(int) const; + template + KOKKOS_INLINE_FUNCTION + void compute_h_item(int, int &, const bool &) const; + template KOKKOS_INLINE_FUNCTION void compute_h_team(const typename Kokkos::TeamPolicy ::member_type &team, int, int) const; @@ -151,7 +155,6 @@ class FixQEqReaxKokkos : public FixQEqReax { int allocated_flag; int need_dup; - DAT::tdual_int_scalar k_mfill_offset; typename AT::t_int_scalar d_mfill_offset; typedef Kokkos::DualView tdual_int_1d; @@ -254,9 +257,14 @@ struct FixQEqReaxKokkosMatVecFunctor { template struct FixQEqReaxKokkosComputeHFunctor { int atoms_per_team, vector_length; + typedef int value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixQEqReaxKokkos c; + FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { + c.cleanup_copy(); + }; + FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos *c_ptr, int _atoms_per_team, int _vector_length) : c(*c_ptr), atoms_per_team(_atoms_per_team), @@ -264,6 +272,11 @@ struct FixQEqReaxKokkosComputeHFunctor { c.cleanup_copy(); }; + KOKKOS_INLINE_FUNCTION + void operator()(const int ii, int &m_fill, const bool &final) const { + c.template compute_h_item(ii,m_fill,final); + } + KOKKOS_INLINE_FUNCTION void operator()( const typename Kokkos::TeamPolicy::member_type &team) const {