From a86572f4fcb47b817b173cd2e4d076b2af2aa897 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 29 Sep 2017 16:20:19 -0600 Subject: [PATCH] Reduce memory churn in Kokkos package --- src/KOKKOS/comm_kokkos.cpp | 54 ++++++++++++++++++------------------- src/KOKKOS/comm_kokkos.h | 1 + src/KOKKOS/npair_kokkos.cpp | 6 +++-- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index f5ed0f525f..ba44ea813f 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -46,7 +46,8 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); memory->sfree(sendlist); sendlist = NULL; - k_sendlist = ArrayTypes::tdual_int_2d(); + k_sendlist = DAT::tdual_int_2d(); + k_total_send = DAT::tdual_int_scalar("comm::k_total_send"); // error check for disallow of OpenMP threads? @@ -57,12 +58,12 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_sendlist = ArrayTypes:: + k_exchange_sendlist = DAT:: tdual_int_1d("comm:k_exchange_sendlist",100); - k_exchange_copylist = ArrayTypes:: + k_exchange_copylist = DAT:: tdual_int_1d("comm:k_exchange_copylist",100); - k_count = ArrayTypes::tdual_int_1d("comm:k_count",1); - k_sendflag = ArrayTypes::tdual_int_1d("comm:k_sendflag",100); + k_count = DAT::tdual_int_1d("comm:k_count",1); + k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); memory->destroy(maxsendlist); maxsendlist = NULL; @@ -659,11 +660,11 @@ struct BuildBorderListFunctor { int iswap,maxsendlist; int nfirst,nlast,dim; typename AT::t_int_2d sendlist; - typename AT::t_int_1d nsend; + typename AT::t_int_scalar nsend; BuildBorderListFunctor(typename AT::tdual_x_array _x, typename AT::tdual_int_2d _sendlist, - typename AT::tdual_int_1d _nsend,int _nfirst, + typename AT::tdual_int_scalar _nsend,int _nfirst, int _nlast, int _dim, X_FLOAT _lo, X_FLOAT _hi, int _iswap, int _maxsendlist): @@ -684,7 +685,7 @@ struct BuildBorderListFunctor { for (int i=teamstart + dev.team_rank(); i= lo && x(i,dim) <= hi) mysend++; } - const int my_store_pos = dev.team_scan(mysend,&nsend(0)); + const int my_store_pos = dev.team_scan(mysend,&nsend()); if (my_store_pos+mysend < maxsendlist) { mysend = my_store_pos; @@ -763,37 +764,34 @@ void CommKokkos::borders_device() { if (sendflag) { if (!bordergroup || ineed >= 2) { if (style == SINGLE) { - typename ArrayTypes::tdual_int_1d total_send("TS",1); - total_send.h_view(0) = 0; - if(exec_space == Device) { - total_send.template modify(); - total_send.template sync(); - } + k_total_send.h_view() = 0; + k_total_send.template modify(); + k_total_send.template sync(); BuildBorderListFunctor f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify(); - total_send.template sync(); + k_total_send.template modify(); + k_total_send.template sync(); - if(total_send.h_view(0) >= maxsendlist[iswap]) { - grow_list(iswap,total_send.h_view(0)); + if(k_total_send.h_view() >= maxsendlist[iswap]) { + grow_list(iswap,k_total_send.h_view()); k_sendlist.modify(); - total_send.h_view(0) = 0; + k_total_send.h_view() = 0; if(exec_space == Device) { - total_send.template modify(); - total_send.template sync(); + k_total_send.template modify(); + k_total_send.template sync(); } BuildBorderListFunctor f(atomKK->k_x,k_sendlist, - total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); + k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::TeamPolicy config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); - total_send.template modify(); - total_send.template sync(); + k_total_send.template modify(); + k_total_send.template sync(); } - nsend = total_send.h_view(0); + nsend = k_total_send.h_view(); } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos"); @@ -961,7 +959,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space) buf_send = k_buf_send.view().ptr_on_device(); } else { - k_buf_send = ArrayTypes:: + k_buf_send = DAT:: tdual_xfloat_2d("comm:k_buf_send",maxsend_border,atom->avec->size_border); buf_send = k_buf_send.view().ptr_on_device(); } @@ -975,7 +973,7 @@ void CommKokkos::grow_recv_kokkos(int n, ExecutionSpace space) { maxrecv = static_cast (BUFFACTOR * n); int maxrecv_border = (maxrecv+BUFEXTRA+5)/atom->avec->size_border + 2; - k_buf_recv = ArrayTypes:: + k_buf_recv = DAT:: tdual_xfloat_2d("comm:k_buf_recv",maxrecv_border,atom->avec->size_border); buf_recv = k_buf_recv.view().ptr_on_device(); } diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index a8ae973124..4065efd000 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -53,6 +53,7 @@ class CommKokkos : public CommBrick { protected: DAT::tdual_int_2d k_sendlist; + DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; DAT::tdual_int_1d k_count; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index b568bd5c93..727a81c87f 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -88,13 +88,15 @@ void NPairKokkos::copy_stencil_info() int maxstencil = ns->get_maxstencil(); - k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + if (maxstencil > k_stencil.dimension_0()) + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); for (int k = 0; k < maxstencil; k++) k_stencil.h_view(k) = ns->stencil[k]; k_stencil.modify(); k_stencil.sync(); if (GHOST) { - k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + if (maxstencil > k_stencilxyz.dimension_0()) + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); for (int k = 0; k < maxstencil; k++) { k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];