mirror of https://github.com/lammps/lammps.git
Merge pull request #2373 from stanmoore1/kk_wkar
Add workaround for performance regression in Kokkos Package
This commit is contained in:
commit
94ad6821f7
|
@ -42,6 +42,10 @@ enum{FULL=1u,HALFTHREAD=2u,HALF=4u};
|
|||
#define MAX_TYPES_STACKPARAMS 12
|
||||
#define NeighClusterSize 8
|
||||
|
||||
namespace Kokkos {
|
||||
using NoInit = ViewAllocateWithoutInitializing;
|
||||
}
|
||||
|
||||
struct lmp_float3 {
|
||||
float x,y,z;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
|
|
@ -40,10 +40,8 @@ void NeighListKokkos<DeviceType>::grow(int nmax)
|
|||
|
||||
k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms);
|
||||
d_ilist = k_ilist.view<DeviceType>();
|
||||
k_numneigh = DAT::tdual_int_1d("neighlist:numneigh",maxatoms);
|
||||
d_numneigh = k_numneigh.view<DeviceType>();
|
||||
k_neighbors = DAT::tdual_neighbors_2d("neighlist:neighbors",maxatoms,maxneighs);
|
||||
d_neighbors = k_neighbors.view<DeviceType>();
|
||||
d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("neighlist:numneigh",maxatoms);
|
||||
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -68,11 +68,9 @@ public:
|
|||
int maxneighs;
|
||||
|
||||
void grow(int nmax);
|
||||
DAT::tdual_neighbors_2d k_neighbors;
|
||||
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
|
||||
DAT::tdual_int_1d k_ilist; // local indices of I atoms
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_ilist;
|
||||
DAT::tdual_int_1d k_numneigh; // # of J neighs for each I
|
||||
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh;
|
||||
|
||||
NeighListKokkos(class LAMMPS *lmp);
|
||||
|
|
|
@ -73,16 +73,14 @@ void NPairCopyKokkos<DeviceType>::copy_to_cpu(NeighList *list)
|
|||
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) listcopy;
|
||||
|
||||
listcopy_kk->k_ilist.template sync<LMPHostType>();
|
||||
listcopy_kk->k_numneigh.template sync<LMPHostType>();
|
||||
listcopy_kk->k_neighbors.template sync<LMPHostType>();
|
||||
|
||||
int inum = listcopy->inum;
|
||||
int gnum = listcopy->gnum;
|
||||
int inum_all = inum;
|
||||
if (list->ghost) inum_all += gnum;
|
||||
auto h_ilist = listcopy_kk->k_ilist.h_view;
|
||||
auto h_numneigh = listcopy_kk->k_numneigh.h_view;
|
||||
auto h_neighbors = listcopy_kk->k_neighbors.h_view;
|
||||
auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
|
||||
auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
|
||||
|
||||
list->inum = inum;
|
||||
list->gnum = gnum;
|
||||
|
|
|
@ -73,8 +73,6 @@ void NPairHalffullKokkos<DeviceType,NEWTON>::build(NeighList *list)
|
|||
list->gnum = k_list_full->gnum;
|
||||
|
||||
k_list->k_ilist.template modify<DeviceType>();
|
||||
k_list->k_numneigh.template modify<DeviceType>();
|
||||
k_list->k_neighbors.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
template<class DeviceType, int NEWTON>
|
||||
|
|
|
@ -305,8 +305,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
|
||||
if(data.h_resize()) {
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->k_neighbors = DAT::tdual_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
|
||||
list->d_neighbors = list->k_neighbors.template view<DeviceType>();
|
||||
list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighbors"), list->d_neighbors.extent(0), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
|
@ -321,8 +320,6 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
|||
}
|
||||
|
||||
list->k_ilist.template modify<DeviceType>();
|
||||
list->k_numneigh.template modify<DeviceType>();
|
||||
list->k_neighbors.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
|
|
@ -519,8 +519,6 @@ fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n"
|
|||
#endif
|
||||
|
||||
list->k_ilist.template modify<DeviceType>();
|
||||
list->k_numneigh.template modify<DeviceType>();
|
||||
list->k_neighbors.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -265,47 +265,47 @@ void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
|
|||
natom = newnatom;
|
||||
nmax = newnmax;
|
||||
|
||||
inside = t_sna_2i(Kokkos::ViewAllocateWithoutInitializing("sna:inside"),natom,nmax);
|
||||
element = t_sna_2i(Kokkos::ViewAllocateWithoutInitializing("sna:rcutij"),natom,nmax);
|
||||
dedr = t_sna_3d(Kokkos::ViewAllocateWithoutInitializing("sna:dedr"),natom,nmax,3);
|
||||
inside = t_sna_2i(Kokkos::NoInit("sna:inside"),natom,nmax);
|
||||
element = t_sna_2i(Kokkos::NoInit("sna:rcutij"),natom,nmax);
|
||||
dedr = t_sna_3d(Kokkos::NoInit("sna:dedr"),natom,nmax,3);
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (!host_flag) {
|
||||
|
||||
cayleyklein = t_sna_2ckp(Kokkos::ViewAllocateWithoutInitializing("sna:cayleyklein"), natom, nmax);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),1,1,1); // dummy allocation
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),1,1,1);
|
||||
ulisttot_re = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_re"),idxu_half_max,nelements,natom);
|
||||
ulisttot_im = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_im"),idxu_half_max,nelements,natom);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_pack"),32,idxu_max,nelements,(natom+32-1)/32);
|
||||
ulist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulist"),1,1,1);
|
||||
zlist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist"),1,1,1);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist_pack"),32,idxz_max,ndoubles,(natom+32-1)/32);
|
||||
blist = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist"),idxb_max,ntriples,natom);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist_pack"),32,idxb_max,ntriples,(natom+32-1)/32);
|
||||
ylist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist"),idxu_half_max,nelements,natom);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_re"),32,idxu_half_max,nelements,(natom+32-1)/32);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_im"),32,idxu_half_max,nelements,(natom+32-1)/32);
|
||||
dulist = t_sna_4c3_ll(Kokkos::ViewAllocateWithoutInitializing("sna:dulist"),1,1,1);
|
||||
cayleyklein = t_sna_2ckp(Kokkos::NoInit("sna:cayleyklein"), natom, nmax);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); // dummy allocation
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1);
|
||||
ulisttot_re = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_re"),idxu_half_max,nelements,natom);
|
||||
ulisttot_im = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_im"),idxu_half_max,nelements,natom);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),32,idxu_max,nelements,(natom+32-1)/32);
|
||||
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),1,1,1);
|
||||
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),1,1,1);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),32,idxz_max,ndoubles,(natom+32-1)/32);
|
||||
blist = t_sna_3d_ll(Kokkos::NoInit("sna:blist"),idxb_max,ntriples,natom);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),32,idxb_max,ntriples,(natom+32-1)/32);
|
||||
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),32,idxu_half_max,nelements,(natom+32-1)/32);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),32,idxu_half_max,nelements,(natom+32-1)/32);
|
||||
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),1,1,1);
|
||||
} else {
|
||||
#endif
|
||||
rij = t_sna_3d(Kokkos::ViewAllocateWithoutInitializing("sna:rij"),natom,nmax,3);
|
||||
wj = t_sna_2d(Kokkos::ViewAllocateWithoutInitializing("sna:wj"),natom,nmax);
|
||||
rcutij = t_sna_2d(Kokkos::ViewAllocateWithoutInitializing("sna:rcutij"),natom,nmax);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),idxu_half_max,nelements,natom);
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_full"),idxu_max,nelements,natom);
|
||||
ulisttot_re = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_re"),1,1,1);
|
||||
ulisttot_im = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_im"),1,1,1);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_pack"),1,1,1,1);
|
||||
ulist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulist"),idxu_cache_max,natom,nmax);
|
||||
zlist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist"),idxz_max,ndoubles,natom);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist_pack"),1,1,1,1);
|
||||
blist = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist"),idxb_max,ntriples,natom);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist_pack"),1,1,1,1);
|
||||
ylist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist"),idxu_half_max,nelements,natom);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_re"),1,1,1,1);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_im"),1,1,1,1);
|
||||
dulist = t_sna_4c3_ll(Kokkos::ViewAllocateWithoutInitializing("sna:dulist"),idxu_cache_max,natom,nmax);
|
||||
rij = t_sna_3d(Kokkos::NoInit("sna:rij"),natom,nmax,3);
|
||||
wj = t_sna_2d(Kokkos::NoInit("sna:wj"),natom,nmax);
|
||||
rcutij = t_sna_2d(Kokkos::NoInit("sna:rcutij"),natom,nmax);
|
||||
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),idxu_half_max,nelements,natom);
|
||||
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot_full"),idxu_max,nelements,natom);
|
||||
ulisttot_re = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_re"),1,1,1);
|
||||
ulisttot_im = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_im"),1,1,1);
|
||||
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),1,1,1,1);
|
||||
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),idxu_cache_max,natom,nmax);
|
||||
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),idxz_max,ndoubles,natom);
|
||||
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),1,1,1,1);
|
||||
blist = t_sna_3d_ll(Kokkos::NoInit("sna:blist"),idxb_max,ntriples,natom);
|
||||
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),1,1,1,1);
|
||||
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom);
|
||||
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),1,1,1,1);
|
||||
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),1,1,1,1);
|
||||
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),idxu_cache_max,natom,nmax);
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue