From f7026491f19a9ac2f2b6dd700b2b9671f0f24a93 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 6 Jun 2019 09:59:41 -0600 Subject: [PATCH] Code reformat --- src/KOKKOS/fix_qeq_reax_kokkos.cpp | 348 ++++++++++++++++------------- src/KOKKOS/fix_qeq_reax_kokkos.h | 55 +++-- 2 files changed, 219 insertions(+), 184 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index fb61898f6d..c7c109d8dd 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -231,24 +231,22 @@ void FixQEqReaxKokkos::pre_force(int vflag) int vector_length = 32; int atoms_per_team = 4; - int num_teams = inum/atoms_per_team + (inum%atoms_per_team?1:0); + int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0); - Kokkos::TeamPolicy policy(num_teams, atoms_per_team, vector_length); - if (neighflag == FULL){ - FixQEqReaxKokkosComputeHFunctor computeH_functor(this, - atoms_per_team, - vector_length); - Kokkos::parallel_for( policy, computeH_functor ); - }else if (neighflag == HALF){ - FixQEqReaxKokkosComputeHFunctor computeH_functor(this, - atoms_per_team, - vector_length); - Kokkos::parallel_for( policy, computeH_functor ); - }else { - FixQEqReaxKokkosComputeHFunctor computeH_functor(this, - atoms_per_team, - vector_length); - Kokkos::parallel_for( policy, computeH_functor ); + Kokkos::TeamPolicy policy(num_teams, atoms_per_team, + vector_length); + if (neighflag == FULL) { + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } else if (neighflag == HALF) { + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } else { + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); } // init_matvec @@ -403,180 +401,208 @@ void FixQEqReaxKokkos::zero_item(int ii) const // d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i) // d_firstnbr- d_firstnbr[i] contains the beginning index from where the H matrix entries corresponding to row-i is stored in d_val // d_jlist - contains the column index corresponding to each entry in d_val -template -template -void -FixQEqReaxKokkos::compute_h_team(const typename Kokkos::TeamPolicy ::member_type &team, - int atoms_per_team, - int vector_length) const{ - // scratch space setup - Kokkos::View< int*, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_ilist(team.team_shmem(), atoms_per_team); - Kokkos::View< int*, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_numnbrs(team.team_shmem(), atoms_per_team); - Kokkos::View< int*, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_firstnbr(team.team_shmem(), atoms_per_team); +template +template +void FixQEqReaxKokkos::compute_h_team( + const typename Kokkos::TeamPolicy::member_type &team, + int atoms_per_team, int vector_length) const { - Kokkos::View< int**, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_jtype(team.team_shmem(), atoms_per_team, vector_length); - Kokkos::View< int**, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_jlist(team.team_shmem(), atoms_per_team, vector_length); - Kokkos::View< F_FLOAT**, Kokkos::ScratchMemorySpace, Kokkos::MemoryTraits > s_r(team.team_shmem(), atoms_per_team, vector_length); + // scratch space setup + Kokkos::View, + Kokkos::MemoryTraits> + s_ilist(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_numnbrs(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_firstnbr(team.team_shmem(), atoms_per_team); - // team of threads work on atoms with index in [firstatom, lastatom) - int firstatom = team.league_rank() * atoms_per_team; - int lastatom = ( firstatom + atoms_per_team < inum ) ? ( firstatom + atoms_per_team ) : inum; + Kokkos::View, + Kokkos::MemoryTraits> + s_jtype(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_jlist(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_r(team.team_shmem(), atoms_per_team, vector_length); - // kokkos-thread-0 is used to load info from global memory into scratch space - if(team.team_rank() == 0){ + // team of threads work on atoms with index in [firstatom, lastatom) + int firstatom = team.league_rank() * atoms_per_team; + int lastatom = + (firstatom + atoms_per_team < inum) ? (firstatom + atoms_per_team) : inum; - // copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team] - // copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team] - // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the - // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom]) + // kokkos-thread-0 is used to load info from global memory into scratch space + if (team.team_rank() == 0) { - Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, atoms_per_team), [&](const int &idx, int &totalnbrs, bool final) { - int ii = firstatom + idx; + // copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team] + // copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team] + // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the + // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom]) - if(ii < inum){ - const int i = d_ilist[ii]; - int jnum = d_numneigh[i]; + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team), + [&](const int &idx, int &totalnbrs, bool final) { + int ii = firstatom + idx; - if(final){ - s_ilist[idx] = i; - s_numnbrs[idx] = jnum; - s_firstnbr[idx] = totalnbrs; - } - totalnbrs += jnum; - }else{ - s_numnbrs[idx] = 0; - } - }); - } + if (ii < inum) { + const int i = d_ilist[ii]; + int jnum = d_numneigh[i]; + if (final) { + s_ilist[idx] = i; + s_numnbrs[idx] = jnum; + s_firstnbr[idx] = totalnbrs; + } + totalnbrs += jnum; + } else { + s_numnbrs[idx] = 0; + } + }); + } - // barrier ensures that the data moved to scratch space is visible to all the threads of the corresponding team - team.team_barrier(); + // barrier ensures that the data moved to scratch space is visible to all the + // threads of the corresponding team + team.team_barrier(); - // calculate the global memory offset from where the H matrix values to be calculated by the current team will be stored in d_val - int team_firstnbr_idx = 0; - Kokkos::single (Kokkos::PerTeam (team), [=] (int &val) { - int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; - val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); - }, team_firstnbr_idx); + // calculate the global memory offset from where the H matrix values to be + // calculated by the current team will be stored in d_val + int team_firstnbr_idx = 0; + Kokkos::single(Kokkos::PerTeam(team), + [=](int &val) { + int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + + s_numnbrs[lastatom - firstatom - 1]; + val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); + }, + team_firstnbr_idx); + // map the H matrix computation of each atom to kokkos-thread (one atom per + // kokkos-thread) neighbor computation for each atom is assigned to vector + // lanes of the corresponding thread + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) { + int ii = firstatom + idx; - // map the H matrix computation of each atom to kokkos-thread (one atom per kokkos-thread) - // neighbor computation for each atom is assigned to vector lanes of the corresponding thread - Kokkos::parallel_for( Kokkos::TeamThreadRange(team, atoms_per_team), [&] (const int &idx) { - int ii = firstatom + idx; + if (ii < inum) { + const int i = s_ilist[idx]; - if(ii < inum){ - const int i = s_ilist[idx]; + if (mask[i] & groupbit) { + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = s_numnbrs[idx]; - if (mask[i] & groupbit) { - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - const tagint itag = tag(i); - const int jnum = s_numnbrs[idx]; + // calculate the write-offset for atom-i's first neighbor + int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); - // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; - Kokkos::single (Kokkos::PerThread (team), [&] () { - d_firstnbr[i] = atomi_firstnbr_idx; - }); + // current # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i which represents the # of + // non-zero elements in row-i of H matrix + int atomi_nbrs_inH = 0; + // calculate H matrix values corresponding to atom-i where neighbors + // are processed in batches and the batch size is vector_length + for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - // current # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i - // which represents the # of non-zero elements in row-i of H matrix - int atomi_nbrs_inH = 0; + int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; - // calculate H matrix values corresponding to atom-i where neighbors are processed in batches and the batch size is vector_length - for(int jj_start = 0; jj_start < jnum; jj_start += vector_length){ + // count the # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i in the current batch + int atomi_nbrs_curbatch = 0; - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + // compute rsq, jtype, j and store in scratch space which is + // reused later + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill) { + const int jj = jj_start + idx; - // count the # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i in the current batch - int atomi_nbrs_curbatch = 0; + // initialize: -1 represents no interaction with atom-j + // where j = d_neighbors(i,jj) + s_jlist(team.team_rank(), idx) = -1; - // compute rsq, jtype, j and store in scratch space which is reused later - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill) { - const int jj = jj_start + idx; + if (jj < jnum) { + int j = d_neighbors(i, jj); + j &= NEIGHMASK; + const int jtype = type(j); - // initialize: -1 represents no interaction with atom-j where j = d_neighbors(i,jj) - s_jlist(team.team_rank(), idx) = -1; + const X_FLOAT delx = x(j, 0) - xtmp; + const X_FLOAT dely = x(j, 1) - ytmp; + const X_FLOAT delz = x(j, 2) - ztmp; - if(jj < jnum){ - int j = d_neighbors(i,jj); - j &= NEIGHMASK; - const int jtype = type(j); + // valid nbr interaction + bool valid = true; + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag + jtag) % 2 == 0) + valid = false; + } else if (itag < jtag) { + if ((itag + jtag) % 2 == 1) + valid = false; + } else { + if (x(j, 2) < ztmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) < ytmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) == ytmp && + x(j, 0) < xtmp) + valid = false; + } + } + } - const X_FLOAT delx = x(j,0) - xtmp; - const X_FLOAT dely = x(j,1) - ytmp; - const X_FLOAT delz = x(j,2) - ztmp; + const F_FLOAT rsq = + delx * delx + dely * dely + delz * delz; + if (rsq > cutsq) + valid = false; - // valid nbr interaction - bool valid = true; - if (NEIGHFLAG != FULL) { - // skip half of the interactions - const tagint jtag = tag(j); - if (j >= nlocal) { - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) - valid = false; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) - valid = false; - } else { - if (x(j,2) < ztmp) - valid = false; - if (x(j,2) == ztmp && x(j,1) < ytmp) - valid = false; - if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) - valid = false; - } - } - } + if (valid) { + s_jlist(team.team_rank(), idx) = j; + s_jtype(team.team_rank(), idx) = jtype; + s_r(team.team_rank(), idx) = sqrt(rsq); + m_fill++; + } + } + }, + atomi_nbrs_curbatch); - const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq > cutsq) - valid = false; + // write non-zero entries of H to global memory + Kokkos::parallel_scan( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill, bool final) { + int j = s_jlist(team.team_rank(), idx); + if (final) { + if (j != -1) { + const int jtype = s_jtype(team.team_rank(), idx); + const F_FLOAT r = s_r(team.team_rank(), idx); + const F_FLOAT shldij = d_shield(itype, jtype); - if(valid){ - s_jlist(team.team_rank(), idx) = j; - s_jtype(team.team_rank(), idx) = jtype; - s_r(team.team_rank(), idx) = sqrt(rsq); - m_fill++; - } - } - }, atomi_nbrs_curbatch); + d_jlist[atomi_nbr_writeIdx + m_fill] = j; + d_val[atomi_nbr_writeIdx + m_fill] = + calculate_H_k(r, shldij); + } + } - // write non-zero entries of H to global memory - Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill, bool final) { - int j = s_jlist(team.team_rank(), idx); - if(final){ - if(j != -1){ - const int jtype = s_jtype(team.team_rank(), idx); - const F_FLOAT r = s_r(team.team_rank(), idx); - const F_FLOAT shldij = d_shield(itype,jtype); - - d_jlist[atomi_nbr_writeIdx + m_fill] = j; - d_val[atomi_nbr_writeIdx + m_fill] = calculate_H_k(r, shldij); - } - } - - if(j !=-1){ - m_fill++; - } - }); - atomi_nbrs_inH += atomi_nbrs_curbatch; - } - - Kokkos::single (Kokkos::PerThread (team), [&] () { - d_numnbrs[i] = atomi_nbrs_inH; - }); - } - } - }); + if (j != -1) { + m_fill++; + } + }); + atomi_nbrs_inH += atomi_nbrs_curbatch; + } + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_numnbrs[i] = atomi_nbrs_inH; }); + } + } + }); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index 10b007bb4f..d17ed350a9 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -259,32 +259,41 @@ struct FixQEqReaxKokkosMatVecFunctor { }; template -struct FixQEqReaxKokkosComputeHFunctor { - int atoms_per_team, vector_length; - typedef Kokkos::ScratchMemorySpace scratch_space; - FixQEqReaxKokkos c; +struct FixQEqReaxKokkosComputeHFunctor { + int atoms_per_team, vector_length; + typedef Kokkos::ScratchMemorySpace scratch_space; + FixQEqReaxKokkos c; - FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos* c_ptr, - int _atoms_per_team, - int _vector_length): - c(*c_ptr), atoms_per_team(_atoms_per_team), vector_length(_vector_length) { - c.cleanup_copy(); - }; + FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos *c_ptr, + int _atoms_per_team, int _vector_length) + : c(*c_ptr), atoms_per_team(_atoms_per_team), + vector_length(_vector_length) { + c.cleanup_copy(); + }; - KOKKOS_INLINE_FUNCTION - void operator()(const typename Kokkos::TeamPolicy ::member_type &team) const { - c.template compute_h_team (team, atoms_per_team, vector_length); - } + KOKKOS_INLINE_FUNCTION + void operator()( + const typename Kokkos::TeamPolicy::member_type &team) const { + c.template compute_h_team(team, atoms_per_team, vector_length); + } - size_t team_shmem_size( int team_size ) const { - size_t shmem_size = Kokkos::View::shmem_size(atoms_per_team) + // s_ilist - Kokkos::View::shmem_size(atoms_per_team) + // s_numnbrs - Kokkos::View::shmem_size(atoms_per_team) + // s_firstnbr - Kokkos::View::shmem_size(atoms_per_team, vector_length) + //s_jtype - Kokkos::View::shmem_size(atoms_per_team, vector_length) + //s_j - Kokkos::View::shmem_size(atoms_per_team, vector_length) ; //s_r - return shmem_size; - } + size_t team_shmem_size(int team_size) const { + size_t shmem_size = + Kokkos::View::shmem_size( + atoms_per_team) + // s_ilist + Kokkos::View::shmem_size( + atoms_per_team) + // s_numnbrs + Kokkos::View::shmem_size( + atoms_per_team) + // s_firstnbr + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_jtype + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_j + Kokkos::View::shmem_size(atoms_per_team, + vector_length); // s_r + return shmem_size; + } }; template