forked from lijiext/lammps
Code reformat
This commit is contained in:
parent
708052dc81
commit
f7026491f1
|
@ -231,24 +231,22 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
|
|||
|
||||
int vector_length = 32;
|
||||
int atoms_per_team = 4;
|
||||
int num_teams = inum/atoms_per_team + (inum%atoms_per_team?1:0);
|
||||
int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
|
||||
|
||||
Kokkos::TeamPolicy <DeviceType> policy(num_teams, atoms_per_team, vector_length);
|
||||
if (neighflag == FULL){
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(this,
|
||||
atoms_per_team,
|
||||
Kokkos::TeamPolicy<DeviceType> policy(num_teams, atoms_per_team,
|
||||
vector_length);
|
||||
Kokkos::parallel_for( policy, computeH_functor );
|
||||
}else if (neighflag == HALF){
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(this,
|
||||
atoms_per_team,
|
||||
vector_length);
|
||||
Kokkos::parallel_for( policy, computeH_functor );
|
||||
}else {
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALFTHREAD> computeH_functor(this,
|
||||
atoms_per_team,
|
||||
vector_length);
|
||||
Kokkos::parallel_for( policy, computeH_functor );
|
||||
if (neighflag == FULL) {
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, FULL> computeH_functor(
|
||||
this, atoms_per_team, vector_length);
|
||||
Kokkos::parallel_for(policy, computeH_functor);
|
||||
} else if (neighflag == HALF) {
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALF> computeH_functor(
|
||||
this, atoms_per_team, vector_length);
|
||||
Kokkos::parallel_for(policy, computeH_functor);
|
||||
} else {
|
||||
FixQEqReaxKokkosComputeHFunctor<DeviceType, HALFTHREAD> computeH_functor(
|
||||
this, atoms_per_team, vector_length);
|
||||
Kokkos::parallel_for(policy, computeH_functor);
|
||||
}
|
||||
|
||||
// init_matvec
|
||||
|
@ -403,115 +401,139 @@ void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const
|
|||
// d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i)
|
||||
// d_firstnbr- d_firstnbr[i] contains the beginning index from where the H matrix entries corresponding to row-i is stored in d_val
|
||||
// d_jlist - contains the column index corresponding to each entry in d_val
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
void
|
||||
FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <DeviceType> ::member_type &team,
|
||||
int atoms_per_team,
|
||||
int vector_length) const{
|
||||
|
||||
template <class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
void FixQEqReaxKokkos<DeviceType>::compute_h_team(
|
||||
const typename Kokkos::TeamPolicy<DeviceType>::member_type &team,
|
||||
int atoms_per_team, int vector_length) const {
|
||||
|
||||
// scratch space setup
|
||||
Kokkos::View< int*, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_ilist(team.team_shmem(), atoms_per_team);
|
||||
Kokkos::View< int*, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_numnbrs(team.team_shmem(), atoms_per_team);
|
||||
Kokkos::View< int*, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_firstnbr(team.team_shmem(), atoms_per_team);
|
||||
Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_ilist(team.team_shmem(), atoms_per_team);
|
||||
Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_numnbrs(team.team_shmem(), atoms_per_team);
|
||||
Kokkos::View<int *, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_firstnbr(team.team_shmem(), atoms_per_team);
|
||||
|
||||
Kokkos::View< int**, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_jtype(team.team_shmem(), atoms_per_team, vector_length);
|
||||
Kokkos::View< int**, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_jlist(team.team_shmem(), atoms_per_team, vector_length);
|
||||
Kokkos::View< F_FLOAT**, Kokkos::ScratchMemorySpace<DeviceType>, Kokkos::MemoryTraits<Kokkos::Unmanaged> > s_r(team.team_shmem(), atoms_per_team, vector_length);
|
||||
Kokkos::View<int **, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_jtype(team.team_shmem(), atoms_per_team, vector_length);
|
||||
Kokkos::View<int **, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_jlist(team.team_shmem(), atoms_per_team, vector_length);
|
||||
Kokkos::View<F_FLOAT **, Kokkos::ScratchMemorySpace<DeviceType>,
|
||||
Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
s_r(team.team_shmem(), atoms_per_team, vector_length);
|
||||
|
||||
// team of threads work on atoms with index in [firstatom, lastatom)
|
||||
int firstatom = team.league_rank() * atoms_per_team;
|
||||
int lastatom = ( firstatom + atoms_per_team < inum ) ? ( firstatom + atoms_per_team ) : inum;
|
||||
int lastatom =
|
||||
(firstatom + atoms_per_team < inum) ? (firstatom + atoms_per_team) : inum;
|
||||
|
||||
// kokkos-thread-0 is used to load info from global memory into scratch space
|
||||
if(team.team_rank() == 0){
|
||||
if (team.team_rank() == 0) {
|
||||
|
||||
// copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team]
|
||||
// copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team]
|
||||
// calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the
|
||||
// upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom])
|
||||
|
||||
Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, atoms_per_team), [&](const int &idx, int &totalnbrs, bool final) {
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team),
|
||||
[&](const int &idx, int &totalnbrs, bool final) {
|
||||
int ii = firstatom + idx;
|
||||
|
||||
if(ii < inum){
|
||||
if (ii < inum) {
|
||||
const int i = d_ilist[ii];
|
||||
int jnum = d_numneigh[i];
|
||||
|
||||
if(final){
|
||||
if (final) {
|
||||
s_ilist[idx] = i;
|
||||
s_numnbrs[idx] = jnum;
|
||||
s_firstnbr[idx] = totalnbrs;
|
||||
}
|
||||
totalnbrs += jnum;
|
||||
}else{
|
||||
} else {
|
||||
s_numnbrs[idx] = 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// barrier ensures that the data moved to scratch space is visible to all the threads of the corresponding team
|
||||
// barrier ensures that the data moved to scratch space is visible to all the
|
||||
// threads of the corresponding team
|
||||
team.team_barrier();
|
||||
|
||||
// calculate the global memory offset from where the H matrix values to be calculated by the current team will be stored in d_val
|
||||
// calculate the global memory offset from where the H matrix values to be
|
||||
// calculated by the current team will be stored in d_val
|
||||
int team_firstnbr_idx = 0;
|
||||
Kokkos::single (Kokkos::PerTeam (team), [=] (int &val) {
|
||||
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1];
|
||||
Kokkos::single(Kokkos::PerTeam(team),
|
||||
[=](int &val) {
|
||||
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
|
||||
s_numnbrs[lastatom - firstatom - 1];
|
||||
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
|
||||
}, team_firstnbr_idx);
|
||||
},
|
||||
team_firstnbr_idx);
|
||||
|
||||
|
||||
// map the H matrix computation of each atom to kokkos-thread (one atom per kokkos-thread)
|
||||
// neighbor computation for each atom is assigned to vector lanes of the corresponding thread
|
||||
Kokkos::parallel_for( Kokkos::TeamThreadRange(team, atoms_per_team), [&] (const int &idx) {
|
||||
// map the H matrix computation of each atom to kokkos-thread (one atom per
|
||||
// kokkos-thread) neighbor computation for each atom is assigned to vector
|
||||
// lanes of the corresponding thread
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) {
|
||||
int ii = firstatom + idx;
|
||||
|
||||
if(ii < inum){
|
||||
if (ii < inum) {
|
||||
const int i = s_ilist[idx];
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
const tagint itag = tag(i);
|
||||
const int jnum = s_numnbrs[idx];
|
||||
|
||||
// calculate the write-offset for atom-i's first neighbor
|
||||
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
Kokkos::single (Kokkos::PerThread (team), [&] () {
|
||||
d_firstnbr[i] = atomi_firstnbr_idx;
|
||||
});
|
||||
Kokkos::single(Kokkos::PerThread(team),
|
||||
[&]() { d_firstnbr[i] = atomi_firstnbr_idx; });
|
||||
|
||||
|
||||
// current # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i
|
||||
// which represents the # of non-zero elements in row-i of H matrix
|
||||
// current # of neighbor atoms with non-zero electrostatic
|
||||
// interaction coefficients with atom-i which represents the # of
|
||||
// non-zero elements in row-i of H matrix
|
||||
int atomi_nbrs_inH = 0;
|
||||
|
||||
// calculate H matrix values corresponding to atom-i where neighbors are processed in batches and the batch size is vector_length
|
||||
for(int jj_start = 0; jj_start < jnum; jj_start += vector_length){
|
||||
// calculate H matrix values corresponding to atom-i where neighbors
|
||||
// are processed in batches and the batch size is vector_length
|
||||
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
|
||||
|
||||
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
|
||||
// count the # of neighbor atoms with non-zero electrostatic interaction coefficients with atom-i in the current batch
|
||||
// count the # of neighbor atoms with non-zero electrostatic
|
||||
// interaction coefficients with atom-i in the current batch
|
||||
int atomi_nbrs_curbatch = 0;
|
||||
|
||||
// compute rsq, jtype, j and store in scratch space which is reused later
|
||||
Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill) {
|
||||
// compute rsq, jtype, j and store in scratch space which is
|
||||
// reused later
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_length),
|
||||
[&](const int &idx, int &m_fill) {
|
||||
const int jj = jj_start + idx;
|
||||
|
||||
// initialize: -1 represents no interaction with atom-j where j = d_neighbors(i,jj)
|
||||
// initialize: -1 represents no interaction with atom-j
|
||||
// where j = d_neighbors(i,jj)
|
||||
s_jlist(team.team_rank(), idx) = -1;
|
||||
|
||||
if(jj < jnum){
|
||||
int j = d_neighbors(i,jj);
|
||||
if (jj < jnum) {
|
||||
int j = d_neighbors(i, jj);
|
||||
j &= NEIGHMASK;
|
||||
const int jtype = type(j);
|
||||
|
||||
const X_FLOAT delx = x(j,0) - xtmp;
|
||||
const X_FLOAT dely = x(j,1) - ytmp;
|
||||
const X_FLOAT delz = x(j,2) - ztmp;
|
||||
const X_FLOAT delx = x(j, 0) - xtmp;
|
||||
const X_FLOAT dely = x(j, 1) - ytmp;
|
||||
const X_FLOAT delz = x(j, 2) - ztmp;
|
||||
|
||||
// valid nbr interaction
|
||||
bool valid = true;
|
||||
|
@ -520,63 +542,67 @@ FixQEqReaxKokkos<DeviceType>::compute_h_team(const typename Kokkos::TeamPolicy <
|
|||
const tagint jtag = tag(j);
|
||||
if (j >= nlocal) {
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0)
|
||||
if ((itag + jtag) % 2 == 0)
|
||||
valid = false;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1)
|
||||
if ((itag + jtag) % 2 == 1)
|
||||
valid = false;
|
||||
} else {
|
||||
if (x(j,2) < ztmp)
|
||||
if (x(j, 2) < ztmp)
|
||||
valid = false;
|
||||
if (x(j,2) == ztmp && x(j,1) < ytmp)
|
||||
if (x(j, 2) == ztmp && x(j, 1) < ytmp)
|
||||
valid = false;
|
||||
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp)
|
||||
if (x(j, 2) == ztmp && x(j, 1) == ytmp &&
|
||||
x(j, 0) < xtmp)
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
const F_FLOAT rsq =
|
||||
delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutsq)
|
||||
valid = false;
|
||||
|
||||
if(valid){
|
||||
if (valid) {
|
||||
s_jlist(team.team_rank(), idx) = j;
|
||||
s_jtype(team.team_rank(), idx) = jtype;
|
||||
s_r(team.team_rank(), idx) = sqrt(rsq);
|
||||
m_fill++;
|
||||
}
|
||||
}
|
||||
}, atomi_nbrs_curbatch);
|
||||
},
|
||||
atomi_nbrs_curbatch);
|
||||
|
||||
// write non-zero entries of H to global memory
|
||||
Kokkos::parallel_scan( Kokkos::ThreadVectorRange(team, vector_length), [&](const int &idx, int &m_fill, bool final) {
|
||||
Kokkos::parallel_scan(
|
||||
Kokkos::ThreadVectorRange(team, vector_length),
|
||||
[&](const int &idx, int &m_fill, bool final) {
|
||||
int j = s_jlist(team.team_rank(), idx);
|
||||
if(final){
|
||||
if(j != -1){
|
||||
if (final) {
|
||||
if (j != -1) {
|
||||
const int jtype = s_jtype(team.team_rank(), idx);
|
||||
const F_FLOAT r = s_r(team.team_rank(), idx);
|
||||
const F_FLOAT shldij = d_shield(itype,jtype);
|
||||
const F_FLOAT shldij = d_shield(itype, jtype);
|
||||
|
||||
d_jlist[atomi_nbr_writeIdx + m_fill] = j;
|
||||
d_val[atomi_nbr_writeIdx + m_fill] = calculate_H_k(r, shldij);
|
||||
d_val[atomi_nbr_writeIdx + m_fill] =
|
||||
calculate_H_k(r, shldij);
|
||||
}
|
||||
}
|
||||
|
||||
if(j !=-1){
|
||||
if (j != -1) {
|
||||
m_fill++;
|
||||
}
|
||||
});
|
||||
atomi_nbrs_inH += atomi_nbrs_curbatch;
|
||||
}
|
||||
|
||||
Kokkos::single (Kokkos::PerThread (team), [&] () {
|
||||
d_numnbrs[i] = atomi_nbrs_inH;
|
||||
});
|
||||
Kokkos::single(Kokkos::PerThread(team),
|
||||
[&]() { d_numnbrs[i] = atomi_nbrs_inH; });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -264,25 +264,34 @@ struct FixQEqReaxKokkosComputeHFunctor {
|
|||
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
|
||||
FixQEqReaxKokkos<DeviceType> c;
|
||||
|
||||
FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos<DeviceType>* c_ptr,
|
||||
int _atoms_per_team,
|
||||
int _vector_length):
|
||||
c(*c_ptr), atoms_per_team(_atoms_per_team), vector_length(_vector_length) {
|
||||
FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos<DeviceType> *c_ptr,
|
||||
int _atoms_per_team, int _vector_length)
|
||||
: c(*c_ptr), atoms_per_team(_atoms_per_team),
|
||||
vector_length(_vector_length) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy <DeviceType> ::member_type &team) const {
|
||||
c.template compute_h_team<NEIGHFLAG> (team, atoms_per_team, vector_length);
|
||||
void operator()(
|
||||
const typename Kokkos::TeamPolicy<DeviceType>::member_type &team) const {
|
||||
c.template compute_h_team<NEIGHFLAG>(team, atoms_per_team, vector_length);
|
||||
}
|
||||
|
||||
size_t team_shmem_size( int team_size ) const {
|
||||
size_t shmem_size = Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) + // s_ilist
|
||||
Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) + // s_numnbrs
|
||||
Kokkos::View<int*, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team) + // s_firstnbr
|
||||
Kokkos::View<int**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) + //s_jtype
|
||||
Kokkos::View<int**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) + //s_j
|
||||
Kokkos::View<F_FLOAT**, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team, vector_length) ; //s_r
|
||||
size_t team_shmem_size(int team_size) const {
|
||||
size_t shmem_size =
|
||||
Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
|
||||
atoms_per_team) + // s_ilist
|
||||
Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
|
||||
atoms_per_team) + // s_numnbrs
|
||||
Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
|
||||
atoms_per_team) + // s_firstnbr
|
||||
Kokkos::View<int **, scratch_space, Kokkos::MemoryUnmanaged>::
|
||||
shmem_size(atoms_per_team, vector_length) + // s_jtype
|
||||
Kokkos::View<int **, scratch_space, Kokkos::MemoryUnmanaged>::
|
||||
shmem_size(atoms_per_team, vector_length) + // s_j
|
||||
Kokkos::View<F_FLOAT **, scratch_space,
|
||||
Kokkos::MemoryUnmanaged>::shmem_size(atoms_per_team,
|
||||
vector_length); // s_r
|
||||
return shmem_size;
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue