Merge pull request #2373 from stanmoore1/kk_wkar

Add workaround for performance regression in Kokkos Package
This commit is contained in:
Axel Kohlmeyer 2020-09-17 20:24:39 -04:00 committed by GitHub
commit 94ad6821f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 44 additions and 53 deletions

View File

@ -42,6 +42,10 @@ enum{FULL=1u,HALFTHREAD=2u,HALF=4u};
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
namespace Kokkos {
using NoInit = ViewAllocateWithoutInitializing;
}
struct lmp_float3 {
float x,y,z;
KOKKOS_INLINE_FUNCTION

View File

@ -40,10 +40,8 @@ void NeighListKokkos<DeviceType>::grow(int nmax)
k_ilist = DAT::tdual_int_1d("neighlist:ilist",maxatoms);
d_ilist = k_ilist.view<DeviceType>();
k_numneigh = DAT::tdual_int_1d("neighlist:numneigh",maxatoms);
d_numneigh = k_numneigh.view<DeviceType>();
k_neighbors = DAT::tdual_neighbors_2d("neighlist:neighbors",maxatoms,maxneighs);
d_neighbors = k_neighbors.view<DeviceType>();
d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("neighlist:numneigh",maxatoms);
d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs);
}
/* ---------------------------------------------------------------------- */

View File

@ -68,11 +68,9 @@ public:
int maxneighs;
void grow(int nmax);
DAT::tdual_neighbors_2d k_neighbors;
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
DAT::tdual_int_1d k_ilist; // local indices of I atoms
typename ArrayTypes<DeviceType>::t_int_1d d_ilist;
DAT::tdual_int_1d k_numneigh; // # of J neighs for each I
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh;
NeighListKokkos(class LAMMPS *lmp);

View File

@ -73,16 +73,14 @@ void NPairCopyKokkos<DeviceType>::copy_to_cpu(NeighList *list)
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) listcopy;
listcopy_kk->k_ilist.template sync<LMPHostType>();
listcopy_kk->k_numneigh.template sync<LMPHostType>();
listcopy_kk->k_neighbors.template sync<LMPHostType>();
int inum = listcopy->inum;
int gnum = listcopy->gnum;
int inum_all = inum;
if (list->ghost) inum_all += gnum;
auto h_ilist = listcopy_kk->k_ilist.h_view;
auto h_numneigh = listcopy_kk->k_numneigh.h_view;
auto h_neighbors = listcopy_kk->k_neighbors.h_view;
auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
list->inum = inum;
list->gnum = gnum;

View File

@ -73,8 +73,6 @@ void NPairHalffullKokkos<DeviceType,NEWTON>::build(NeighList *list)
list->gnum = k_list_full->gnum;
k_list->k_ilist.template modify<DeviceType>();
k_list->k_numneigh.template modify<DeviceType>();
k_list->k_neighbors.template modify<DeviceType>();
}
template<class DeviceType, int NEWTON>

View File

@ -305,8 +305,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
if(data.h_resize()) {
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->k_neighbors = DAT::tdual_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
list->d_neighbors = list->k_neighbors.template view<DeviceType>();
list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighbors"), list->d_neighbors.extent(0), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
@ -321,8 +320,6 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
}
list->k_ilist.template modify<DeviceType>();
list->k_numneigh.template modify<DeviceType>();
list->k_neighbors.template modify<DeviceType>();
}
/* ---------------------------------------------------------------------- */

View File

@ -519,8 +519,6 @@ fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n"
#endif
list->k_ilist.template modify<DeviceType>();
list->k_numneigh.template modify<DeviceType>();
list->k_neighbors.template modify<DeviceType>();
}

View File

@ -265,47 +265,47 @@ void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
natom = newnatom;
nmax = newnmax;
inside = t_sna_2i(Kokkos::ViewAllocateWithoutInitializing("sna:inside"),natom,nmax);
element = t_sna_2i(Kokkos::ViewAllocateWithoutInitializing("sna:rcutij"),natom,nmax);
dedr = t_sna_3d(Kokkos::ViewAllocateWithoutInitializing("sna:dedr"),natom,nmax,3);
inside = t_sna_2i(Kokkos::NoInit("sna:inside"),natom,nmax);
element = t_sna_2i(Kokkos::NoInit("sna:rcutij"),natom,nmax);
dedr = t_sna_3d(Kokkos::NoInit("sna:dedr"),natom,nmax,3);
#ifdef LMP_KOKKOS_GPU
if (!host_flag) {
cayleyklein = t_sna_2ckp(Kokkos::ViewAllocateWithoutInitializing("sna:cayleyklein"), natom, nmax);
ulisttot = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),1,1,1); // dummy allocation
ulisttot_full = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),1,1,1);
ulisttot_re = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_re"),idxu_half_max,nelements,natom);
ulisttot_im = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_im"),idxu_half_max,nelements,natom);
ulisttot_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_pack"),32,idxu_max,nelements,(natom+32-1)/32);
ulist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulist"),1,1,1);
zlist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist"),1,1,1);
zlist_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist_pack"),32,idxz_max,ndoubles,(natom+32-1)/32);
blist = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist"),idxb_max,ntriples,natom);
blist_pack = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist_pack"),32,idxb_max,ntriples,(natom+32-1)/32);
ylist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist"),idxu_half_max,nelements,natom);
ylist_pack_re = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_re"),32,idxu_half_max,nelements,(natom+32-1)/32);
ylist_pack_im = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_im"),32,idxu_half_max,nelements,(natom+32-1)/32);
dulist = t_sna_4c3_ll(Kokkos::ViewAllocateWithoutInitializing("sna:dulist"),1,1,1);
cayleyklein = t_sna_2ckp(Kokkos::NoInit("sna:cayleyklein"), natom, nmax);
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); // dummy allocation
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1);
ulisttot_re = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_re"),idxu_half_max,nelements,natom);
ulisttot_im = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_im"),idxu_half_max,nelements,natom);
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),32,idxu_max,nelements,(natom+32-1)/32);
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),1,1,1);
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),1,1,1);
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),32,idxz_max,ndoubles,(natom+32-1)/32);
blist = t_sna_3d_ll(Kokkos::NoInit("sna:blist"),idxb_max,ntriples,natom);
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),32,idxb_max,ntriples,(natom+32-1)/32);
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom);
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),32,idxu_half_max,nelements,(natom+32-1)/32);
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),32,idxu_half_max,nelements,(natom+32-1)/32);
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),1,1,1);
} else {
#endif
rij = t_sna_3d(Kokkos::ViewAllocateWithoutInitializing("sna:rij"),natom,nmax,3);
wj = t_sna_2d(Kokkos::ViewAllocateWithoutInitializing("sna:wj"),natom,nmax);
rcutij = t_sna_2d(Kokkos::ViewAllocateWithoutInitializing("sna:rcutij"),natom,nmax);
ulisttot = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot"),idxu_half_max,nelements,natom);
ulisttot_full = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_full"),idxu_max,nelements,natom);
ulisttot_re = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_re"),1,1,1);
ulisttot_im = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_im"),1,1,1);
ulisttot_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulisttot_pack"),1,1,1,1);
ulist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ulist"),idxu_cache_max,natom,nmax);
zlist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist"),idxz_max,ndoubles,natom);
zlist_pack = t_sna_4c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:zlist_pack"),1,1,1,1);
blist = t_sna_3d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist"),idxb_max,ntriples,natom);
blist_pack = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:blist_pack"),1,1,1,1);
ylist = t_sna_3c_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist"),idxu_half_max,nelements,natom);
ylist_pack_re = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_re"),1,1,1,1);
ylist_pack_im = t_sna_4d_ll(Kokkos::ViewAllocateWithoutInitializing("sna:ylist_pack_im"),1,1,1,1);
dulist = t_sna_4c3_ll(Kokkos::ViewAllocateWithoutInitializing("sna:dulist"),idxu_cache_max,natom,nmax);
rij = t_sna_3d(Kokkos::NoInit("sna:rij"),natom,nmax,3);
wj = t_sna_2d(Kokkos::NoInit("sna:wj"),natom,nmax);
rcutij = t_sna_2d(Kokkos::NoInit("sna:rcutij"),natom,nmax);
ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),idxu_half_max,nelements,natom);
ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot_full"),idxu_max,nelements,natom);
ulisttot_re = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_re"),1,1,1);
ulisttot_im = t_sna_3d_ll(Kokkos::NoInit("sna:ulisttot_im"),1,1,1);
ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),1,1,1,1);
ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),idxu_cache_max,natom,nmax);
zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),idxz_max,ndoubles,natom);
zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),1,1,1,1);
blist = t_sna_3d_ll(Kokkos::NoInit("sna:blist"),idxb_max,ntriples,natom);
blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),1,1,1,1);
ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom);
ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),1,1,1,1);
ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),1,1,1,1);
dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),idxu_cache_max,natom,nmax);
#ifdef LMP_KOKKOS_GPU
}