enable MSM to work withe new GridComm class

2020-08-12 17:41:31 -06:00 · 2020-08-12 17:41:31 -06:00 · 3a1b88c57f
parent e00544c8bf
commit 3a1b88c57f
6 changed files with 368 additions and 304 deletions
--- a/src/KSPACE/gridcomm.cpp
+++ b/src/KSPACE/gridcomm.cpp
@ -17,6 +17,7 @@
 #include "kspace.h"
 #include "irregular.h"
 #include "memory.h"
+#include "error.h"

 using namespace LAMMPS_NS;

@ -24,12 +25,17 @@ enum{REGULAR,TILED};

 #define SWAPDELTA 8

-// NOTE: gridcomm needs to be world for TILED, will it work with MSM?
-// NOTE: Tiled implementation here only works for RCB, not general tiled
+/* ----------------------------------------------------------------------
+   NOTES
+   tiled implementation only currently works for RCB, not general tiled
+   if o indices for ghosts are < 0 or hi indices are >= N,
+     then grid is treated as periodic in that dimension,
+     communication is done across the periodic boundaries
+------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   gcomm = MPI communicator that shares this grid
-           does not have to be world, see MSM
+   constructor called by all classes except MSM
+   gcomm = world communicator
   gn xyz = size of global grid
   i xyz lohi = portion of global grid this proc owns, 0 <= index < N
   o xyz lohi = owned grid portion + ghost grid cells needed in all directions
@ -44,130 +50,79 @@ GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm,
 		   int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi)
  : Pointers(lmp)
 {
-  gridcomm = gcomm;
-  MPI_Comm_rank(gridcomm,&me);
-  MPI_Comm_size(gridcomm,&nprocs);
-
-  nx = gnx;
-  ny = gny;
-  nz = gnz;
-  
-  inxlo = ixlo;
-  inxhi = ixhi;
-  inylo = iylo;
-  inyhi = iyhi;
-  inzlo = izlo;
-  inzhi = izhi;
-
-  outxlo = oxlo;
-  outxhi = oxhi;
-  outylo = oylo;
-  outyhi = oyhi;
-  outzlo = ozlo;
-  outzhi = ozhi;
-
-  // layout == REGULAR or TILED
-  // for REGULAR, proc xyz lohi = my 6 neighbor procs
-  
-  layout = REGULAR;
  if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
-  
-  outxlo_max = oxlo;
-  outxhi_max = oxhi;
-  outylo_max = oylo;
-  outyhi_max = oyhi;
-  outzlo_max = ozlo;
-  outzhi_max = ozhi;
+  else layout = REGULAR;

  if (layout == REGULAR) {
    int (*procneigh)[2] = comm->procneigh;
-
-    procxlo = procneigh[0][0];
-    procxhi = procneigh[0][1];
-    procylo = procneigh[1][0];
-    procyhi = procneigh[1][1];
-    proczlo = procneigh[2][0];
-    proczhi = procneigh[2][1];
+    initialize(gcomm,gnx,gny,gnz,
+	       ixlo,ixhi,iylo,iyhi,izlo,izhi,
+	       oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+	       oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+	       procneigh[0][0],procneigh[0][1],
+	       procneigh[1][0],procneigh[1][1],
+	       procneigh[2][0],procneigh[2][1]);
+  } else {
+    initialize(gcomm,gnx,gny,gnz,
+	       ixlo,ixhi,iylo,iyhi,izlo,izhi,
+	       oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+	       oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+	       0,0,0,0,0,0);
  }
-  
-  nswap = maxswap = 0;
-  swap = NULL;
-
-  nsend = nrecv = ncopy = 0;
-  send = NULL;
-  recv = NULL;
-  copy = NULL;
-  requests = NULL;
 }

 /* ----------------------------------------------------------------------
-   same as first constructor except o xyz lohi max are added arguments
-   this is for case when caller stores grid in a larger array than o xyz lohi
-   only affects indices() method which generates indices into the caller's array
+   constructor called by MSM
+   gcomm = world communicator or sub-communicator for a hierarchical grid
+   flag = 1 if e xyz lohi values = larger grid stored by caller in gcomm = world
+   flag = 2 if e xyz lohi values = 6 neighbor procs in gcomm
+   gn xyz = size of global grid
+   i xyz lohi = portion of global grid this proc owns, 0 <= index < N
+   o xyz lohi = owned grid portion + ghost grid cells needed in all directions
+   e xyz lohi for flag = 1: extent of larger grid stored by caller
+   e xyz lohi for flag = 2: 6 neighbor procs
 ------------------------------------------------------------------------- */

-GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm,
+GridComm::GridComm(LAMMPS *lmp, MPI_Comm gcomm, int flag,
 		   int gnx, int gny, int gnz,
 		   int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
 		   int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
-		   int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max,
-		   int ozlo_max, int ozhi_max)
+		   int exlo, int exhi, int eylo, int eyhi, int ezlo, int ezhi)
  : Pointers(lmp)
 {
-  gridcomm = gcomm;
-  MPI_Comm_rank(gridcomm,&me);
-  MPI_Comm_size(gridcomm,&nprocs);
-
-  nx = gnx;
-  ny = gny;
-  nz = gnz;
-
-  inxlo = ixlo;
-  inxhi = ixhi;
-  inylo = iylo;
-  inyhi = iyhi;
-  inzlo = izlo;
-  inzhi = izhi;
-
-  outxlo = oxlo;
-  outxhi = oxhi;
-  outylo = oylo;
-  outyhi = oyhi;
-  outzlo = ozlo;
-  outzhi = ozhi;
-
-  outxlo_max = oxlo_max;
-  outxhi_max = oxhi_max;
-  outylo_max = oylo_max;
-  outyhi_max = oyhi_max;
-  outzlo_max = ozlo_max;
-  outzhi_max = ozhi_max;
-
-  // layout == REGULAR or TILED
-  // for REGULAR, proc xyz lohi = my 6 neighbor procs
-
-  layout = REGULAR;
  if (comm->layout == Comm::LAYOUT_TILED) layout = TILED;
+  else layout = REGULAR;

-  if (layout == REGULAR) {
-    int (*procneigh)[2] = comm->procneigh;
-
-    procxlo = procneigh[0][0];
-    procxhi = procneigh[0][1];
-    procylo = procneigh[1][0];
-    procyhi = procneigh[1][1];
-    proczlo = procneigh[2][0];
-    proczhi = procneigh[2][1];
+  if (flag == 1) {
+    if (layout == REGULAR) {
+      // this assumes gcomm = world
+      int (*procneigh)[2] = comm->procneigh;
+      initialize(gcomm,gnx,gny,gnz,
+		 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+		 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+		 exlo,exhi,eylo,eyhi,ezlo,ezhi,
+		 procneigh[0][0],procneigh[0][1],
+		 procneigh[1][0],procneigh[1][1],
+		 procneigh[2][0],procneigh[2][1]);
+    } else {
+      initialize(gcomm,gnx,gny,gnz,
+		 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+		 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+		 exlo,exhi,eylo,eyhi,ezlo,ezhi,
+		 0,0,0,0,0,0);
+    }
+    
+  } else if (flag == 2) {
+    if (layout == REGULAR) {
+      initialize(gcomm,gnx,gny,gnz,
+		 ixlo,ixhi,iylo,iyhi,izlo,izhi,
+		 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+		 oxlo,oxhi,oylo,oyhi,ozlo,ozhi,
+		 exlo,exhi,eylo,eyhi,ezlo,ezhi);
+    } else {
+      error->all(FLERR,"GridComm does not support tiled layout with neighbor procs");
+    }
  }
-
-  nswap = maxswap = 0;
-  swap = NULL;
-
-  nsend = nrecv = ncopy = 0;
-  send = NULL;
-  recv = NULL;
-  copy = NULL;
-  requests = NULL;
 }

 /* ---------------------------------------------------------------------- */
@ -201,6 +156,69 @@ GridComm::~GridComm()
  delete [] requests;
 }

+/* ----------------------------------------------------------------------
+   store constructor args in local variables
+------------------------------------------------------------------------- */
+
+void GridComm::initialize(MPI_Comm gcomm,
+			  int gnx, int gny, int gnz,
+			  int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
+			  int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
+			  int fxlo, int fxhi, int fylo, int fyhi, int fzlo, int fzhi,
+			  int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
+{
+  gridcomm = gcomm;
+  MPI_Comm_rank(gridcomm,&me);
+  MPI_Comm_size(gridcomm,&nprocs);
+
+  nx = gnx;
+  ny = gny;
+  nz = gnz;
+  
+  inxlo = ixlo;
+  inxhi = ixhi;
+  inylo = iylo;
+  inyhi = iyhi;
+  inzlo = izlo;
+  inzhi = izhi;
+
+  outxlo = oxlo;
+  outxhi = oxhi;
+  outylo = oylo;
+  outyhi = oyhi;
+  outzlo = ozlo;
+  outzhi = ozhi;
+
+  fullxlo = fxlo;
+  fullxhi = fxhi;
+  fullylo = fylo;
+  fullyhi = fyhi;
+  fullzlo = fzlo;
+  fullzhi = fzhi;
+
+  // for REGULAR layout, proc xyz lohi = my 6 neighbor procs in this MPI_Comm
+
+  if (layout == REGULAR) {
+    procxlo = pxlo;
+    procxhi = pxhi;
+    procylo = pylo;
+    procyhi = pyhi;
+    proczlo = pzlo;
+    proczhi = pzhi;
+  }
+
+  // internal data initializations
+  
+  nswap = maxswap = 0;
+  swap = NULL;
+
+  nsend = nrecv = ncopy = 0;
+  send = NULL;
+  recv = NULL;
+  copy = NULL;
+  requests = NULL;
+}
+
 /* ---------------------------------------------------------------------- */

 void GridComm::setup(int &nbuf1, int &nbuf2)
@ -504,6 +522,7 @@ void GridComm::setup_regular(int &nbuf1, int &nbuf2)
 }

 /* ----------------------------------------------------------------------
+   NOTE: need to doc this header
 ------------------------------------------------------------------------- */

 void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
@ -725,6 +744,8 @@ void GridComm::setup_tiled(int &nbuf1, int &nbuf2)
 }

 /* ----------------------------------------------------------------------
+   NOTE: need to doc this header
+   recursive ...
 ------------------------------------------------------------------------- */

 void GridComm::ghost_box_drop(int *box, int *pbc)
@ -803,10 +824,12 @@ void GridComm::ghost_box_drop(int *box, int *pbc)
 }

 /* ----------------------------------------------------------------------
+   NOTE: need to doc this header
+   recursive ...
 ------------------------------------------------------------------------- */

 void GridComm::box_drop_grid(int *box, int proclower, int procupper,
-			      int &np, int *plist)
+			     int &np, int *plist)
 {
  // end recursion when partition is a single proc
  // add proclower to plist
@ -880,7 +903,7 @@ int GridComm::ghost_adjacent_tiled()
 ------------------------------------------------------------------------- */

 void GridComm::forward_comm_kspace(KSpace *kspace, int nper, int nbyte, int which,
-				    void *buf1, void *buf2, MPI_Datatype datatype)
+				   void *buf1, void *buf2, MPI_Datatype datatype)
 {
  if (layout == REGULAR)
    forward_comm_kspace_regular(kspace,nper,nbyte,which,buf1,buf2,datatype);
@ -1083,15 +1106,15 @@ int GridComm::indices(int *&list,
  memory->create(list,nmax,"CommGrid:indices");
  if (nmax == 0) return 0;

-  int nx = (outxhi_max-outxlo_max+1);
-  int ny = (outyhi_max-outylo_max+1);
+  int nx = (fullxhi-fullxlo+1);
+  int ny = (fullyhi-fullylo+1);

  int n = 0;
  int ix,iy,iz;
  for (iz = zlo; iz <= zhi; iz++)
    for (iy = ylo; iy <= yhi; iy++)
      for (ix = xlo; ix <= xhi; ix++)
-        list[n++] = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max);
+        list[n++] = (iz-fullzlo)*ny*nx + (iy-fullylo)*nx + (ix-fullxlo);

  return nmax;
 }
--- a/src/KSPACE/gridcomm.h
+++ b/src/KSPACE/gridcomm.h
@ -23,7 +23,7 @@ class GridComm : protected Pointers {
  GridComm(class LAMMPS *, MPI_Comm, int, int, int,
 	   int, int, int, int, int, int,
 	   int, int, int, int, int, int);
-  GridComm(class LAMMPS *, MPI_Comm, int, int, int,
+  GridComm(class LAMMPS *, MPI_Comm, int, int, int, int,
 	   int, int, int, int, int, int,
 	   int, int, int, int, int, int,
 	   int, int, int, int, int, int);
@ -38,7 +38,8 @@ class GridComm : protected Pointers {
 private:
  int me,nprocs;
  int layout;                 // REGULAR or TILED
-  MPI_Comm gridcomm;
+  MPI_Comm gridcomm;          // communicator for this class
+                              // usually world, but MSM calls with subset

  // inputs from caller via constructor

@ -48,21 +49,21 @@ class GridComm : protected Pointers {
  int inzlo,inzhi;   
  int outxlo,outxhi;          // inclusive extent of my grid chunk plus
  int outylo,outyhi;          //   ghost cells in all 6 directions
-  int outzlo,outzhi;          // lo indices can be < 0, hi indices can be >= N
-  int outxlo_max,outxhi_max;  // ??
-  int outylo_max,outyhi_max;
-  int outzlo_max,outzhi_max;
+  int outzlo,outzhi;          //   lo indices can be < 0, hi indices can be >= N
+  int fullxlo,fullxhi;        // extent of grid chunk that caller stores
+  int fullylo,fullyhi;        //   can be same as out indices or larger
+  int fullzlo,fullzhi;

  // -------------------------------------------
  // internal variables for REGULAR layout
  // -------------------------------------------

  int procxlo,procxhi;     // 6 neighbor procs that adjoin me
-  int procylo,procyhi;     //   not used for comm_style = tiled
+  int procylo,procyhi;     // not used for comm_style = tiled
  int proczlo,proczhi;
  
  int ghostxlo,ghostxhi;   // # of my owned grid planes needed
-  int ghostylo,ghostyhi;   //   by neighobr procs in each dir as their ghost planes
+  int ghostylo,ghostyhi;   // by neighobr procs in each dir as their ghost planes
  int ghostzlo,ghostzhi;

  // swap = exchange of owned and ghost grid cells between 2 procs, including self
@ -83,8 +84,8 @@ class GridComm : protected Pointers {
  // internal variables for TILED layout
  // -------------------------------------------

-  int *overlap_procs;
-  MPI_Request *requests;
+  int *overlap_procs;          // length of Nprocs in communicator
+  MPI_Request *requests;       // length of max messages this proc receives

  // RCB tree of cut info
  // each proc contributes one value, except proc 0
@ -174,7 +175,12 @@ class GridComm : protected Pointers {
  // -------------------------------------------
  // internal methods
  // -------------------------------------------
-  
+
+  void initialize(MPI_Comm, int, int, int,
+		  int, int, int, int, int, int,
+		  int, int, int, int, int, int,
+		  int, int, int, int, int, int,
+		  int, int, int, int, int, int);
  void setup_regular(int &, int &);
  void setup_tiled(int &, int &);
  void ghost_box_drop(int *, int *);
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@ -42,6 +42,7 @@ using namespace MathConst;

 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
+
 /* ---------------------------------------------------------------------- */

 MSM::MSM(LAMMPS *lmp) : KSpace(lmp),
@ -49,14 +50,15 @@ MSM::MSM(LAMMPS *lmp) : KSpace(lmp),
  ny_msm(NULL), nz_msm(NULL), nxlo_in(NULL), nylo_in(NULL), nzlo_in(NULL),
  nxhi_in(NULL), nyhi_in(NULL), nzhi_in(NULL), nxlo_out(NULL), nylo_out(NULL),
  nzlo_out(NULL), nxhi_out(NULL), nyhi_out(NULL), nzhi_out(NULL), ngrid(NULL),
-  active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL), peratom_allocate_flag(0),
+  active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL),
+  peratom_allocate_flag(0),
  levels(0), world_levels(NULL), qgrid(NULL), egrid(NULL), v0grid(NULL), v1grid(NULL),
  v2grid(NULL), v3grid(NULL), v4grid(NULL), v5grid(NULL), g_direct(NULL),
  v0_direct(NULL), v1_direct(NULL), v2_direct(NULL), v3_direct(NULL), v4_direct(NULL),
  v5_direct(NULL), g_direct_top(NULL), v0_direct_top(NULL), v1_direct_top(NULL),
  v2_direct_top(NULL), v3_direct_top(NULL), v4_direct_top(NULL), v5_direct_top(NULL),
-  phi1d(NULL), dphi1d(NULL), procneigh_levels(NULL), cg(NULL), cg_peratom(NULL),
-  cg_all(NULL), cg_peratom_all(NULL), part2grid(NULL), boxlo(NULL)
+  phi1d(NULL), dphi1d(NULL), procneigh_levels(NULL), gc(NULL),
+  gcall(NULL), part2grid(NULL), boxlo(NULL)
 {
  msmflag = 1;

@ -117,6 +119,7 @@ MSM::~MSM()
  delete [] factors;
  deallocate();
  if (peratom_allocate_flag) deallocate_peratom();
+  deallocate_levels();
  memory->destroy(part2grid);
  memory->destroy(g_direct);
  memory->destroy(g_direct_top);
@ -132,7 +135,6 @@ MSM::~MSM()
  memory->destroy(v3_direct_top);
  memory->destroy(v4_direct_top);
  memory->destroy(v5_direct_top);
-  deallocate_levels();
 }

 /* ----------------------------------------------------------------------
@ -397,17 +399,6 @@ void MSM::setup()
  // don't invoke allocate_peratom(), compute() will allocate when needed

  allocate();
-
-  // setup commgrid
-
-  cg_all->ghost_notify();
-  cg_all->setup();
-  for (int n=0; n<levels; n++) {
-    if (!active_flag[n]) continue;
-    cg[n]->ghost_notify();
-    cg[n]->setup();
-  }
-
 }

 /* ----------------------------------------------------------------------
@ -448,16 +439,7 @@ void MSM::compute(int eflag, int vflag)

  // invoke allocate_peratom() if needed for first time

-  if (vflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    cg_peratom_all->ghost_notify();
-    cg_peratom_all->setup();
-    for (int n=0; n<levels; n++) {
-      if (!active_flag[n]) continue;
-      cg_peratom[n]->ghost_notify();
-      cg_peratom[n]->setup();
-    }
-  }
+  if (vflag_atom && !peratom_allocate_flag) allocate_peratom();

  // convert atoms from box to lamda coords

@ -483,7 +465,8 @@ void MSM::compute(int eflag, int vflag)
  // to fully sum contribution in their 3d grid

  current_level = 0;
-  cg_all->reverse_comm(this,REVERSE_RHO);
+  gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
+			     gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // forward communicate charge density values to fill ghost grid points
  // compute direct sum interaction and then restrict to coarser grid
@ -491,8 +474,8 @@ void MSM::compute(int eflag, int vflag)
  for (int n=0; n<=levels-2; n++) {
    if (!active_flag[n]) continue;
    current_level = n;
-    cg[n]->forward_comm(this,FORWARD_RHO);
-
+    gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
+			       gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
    direct(n);
    restriction(n);
  }
@ -503,11 +486,18 @@ void MSM::compute(int eflag, int vflag)
  if (active_flag[levels-1]) {
    if (domain->nonperiodic) {
      current_level = levels-1;
-      cg[levels-1]->forward_comm(this,FORWARD_RHO);
+      gc[levels-1]->
+	forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
+			    gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
      direct_top(levels-1);
-      cg[levels-1]->reverse_comm(this,REVERSE_AD);
+      gc[levels-1]->
+	reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
+			    gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
      if (vflag_atom)
-        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
+	gc[levels-1]->
+	  reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+			      gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
+      
    } else {
      // Here using MPI_Allreduce is cheaper than using commgrid
      grid_swap_forward(levels-1,qgrid[levels-1]);
@ -515,7 +505,9 @@ void MSM::compute(int eflag, int vflag)
      grid_swap_reverse(levels-1,egrid[levels-1]);
      current_level = levels-1;
      if (vflag_atom)
-        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
+	gc[levels-1]->
+	  reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+			      gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
    }
  }

@ -527,24 +519,28 @@ void MSM::compute(int eflag, int vflag)
    prolongation(n);

    current_level = n;
-    cg[n]->reverse_comm(this,REVERSE_AD);
+    gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
+			       gc_buf1[n],gc_buf2[n],MPI_DOUBLE);

    // extra per-atom virial communication

    if (vflag_atom)
-      cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
+      gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+				 gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
  }

  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  current_level = 0;
-  cg_all->forward_comm(this,FORWARD_AD);
+  gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
+			     gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // extra per-atom energy/virial communication

  if (vflag_atom)
-    cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
+    gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
+			       gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // calculate the force on my particles (interpolation)

@ -603,8 +599,7 @@ void MSM::compute(int eflag, int vflag)

  // convert atoms back from lamda to box coords

-  if (triclinic)
-    domain->lamda2x(atom->nlocal);
+  if (triclinic) domain->lamda2x(atom->nlocal);
 }

 /* ----------------------------------------------------------------------
@ -621,15 +616,18 @@ void MSM::allocate()

  // commgrid using all processors for finest grid level

-  int (*procneigh_all)[2] = comm->procneigh;
+  gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
+		       nxlo_in[0],nxhi_in[0],nylo_in[0],
+		       nyhi_in[0],nzlo_in[0],nzhi_in[0],
+		       nxlo_out_all,nxhi_out_all,nylo_out_all,
+		       nyhi_out_all,nzlo_out_all,nzhi_out_all,
+		       nxlo_out[0],nxhi_out[0],nylo_out[0],
+		       nyhi_out[0],nzlo_out[0],nzhi_out[0]);

-
-  cg_all = new GridComm(lmp,world,1,1,
-                    nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
-                    nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
-                    nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
-                    procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
-                    procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
+  gcall->setup(ngcall_buf1,ngcall_buf2);
+  npergrid = 1;
+  memory->create(gcall_buf1,npergrid*ngcall_buf1,"msm:gcall_buf1");
+  memory->create(gcall_buf2,npergrid*ngcall_buf2,"msm:gcall_buf2");

  // allocate memory for each grid level

@ -644,12 +642,23 @@ void MSM::allocate()

    if (active_flag[n]) {
      int **procneigh = procneigh_levels[n];
-      cg[n] = new GridComm(lmp,world_levels[n],1,1,
-                        nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
-                        nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    } else cg[n] = nullptr;
+      gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
+			   nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
+			   nzlo_in[n],nzhi_in[n],
+			   nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
+			   nzlo_out[n],nzhi_out[n],
+			   procneigh[0][0],procneigh[0][1],procneigh[1][0],
+			   procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+      gc[n]->setup(ngc_buf1[n],ngc_buf2[n]);
+      npergrid = 1;
+      memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1");
+      memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2");
+
+    } else {
+      gc[n] = nullptr;
+      gc_buf1[n] = gc_buf2[n] = nullptr;
+    }
  }
 }

@ -662,9 +671,12 @@ void MSM::deallocate()
  memory->destroy2d_offset(phi1d,-order_allocated);
  memory->destroy2d_offset(dphi1d,-order_allocated);

-  if (cg_all) delete cg_all;
-  cg_all = nullptr;
-
+  if (gcall) delete gcall;
+  memory->destroy(gcall_buf1);
+  memory->destroy(gcall_buf2);
+  gcall = nullptr;
+  gcall_buf1 = gcall_buf2 = nullptr;
+  
  for (int n=0; n<levels; n++) {
    if (qgrid[n])
      memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
@ -676,10 +688,13 @@ void MSM::deallocate()
      if (world_levels[n] != MPI_COMM_NULL)
          MPI_Comm_free(&world_levels[n]);

-    if (cg) {
-      if (cg[n]) {
-        delete cg[n];
-        cg[n] = nullptr;
+    if (gc) {
+      if (gc[n]) {
+        delete gc[n];
+	memory->destroy(gc_buf1[n]);
+	memory->destroy(gc_buf2[n]);
+        gc[n] = nullptr;
+	gc_buf1[n] = gc_buf2[n] = nullptr;
      }
    }
  }
@ -695,15 +710,11 @@ void MSM::allocate_peratom()

  // create commgrid object for per-atom virial using all processors

-  int (*procneigh_all)[2] = comm->procneigh;
-
-  cg_peratom_all =
-    new GridComm(lmp,world,6,6,
-                 nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
-                 nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
-                 nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
-                 procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
-                 procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
+  npergrid = 6;
+  memory->destroy(gcall_buf1);
+  memory->destroy(gcall_buf2);
+  memory->create(gcall_buf1,npergrid*ngcall_buf1,"pppm:gcall_buf1");
+  memory->create(gcall_buf2,npergrid*ngcall_buf2,"pppm:gcall_buf2");

  // allocate memory for each grid level

@ -724,13 +735,11 @@ void MSM::allocate_peratom()
    // create commgrid object for per-atom virial

    if (active_flag[n]) {
-      int **procneigh = procneigh_levels[n];
-      cg_peratom[n] =
-        new GridComm(lmp,world_levels[n],6,6,
-                     nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
-                     nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+      npergrid = 6;
+      memory->destroy(gc_buf1[n]);
+      memory->destroy(gc_buf2[n]);
+      memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"pppm:gc_buf1");
+      memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"pppm:gc_buf2");
    }
  }
 }
@ -743,8 +752,6 @@ void MSM::deallocate_peratom()
 {
  peratom_allocate_flag = 0;

-  if (cg_peratom_all) delete cg_peratom_all;
-
  for (int n=0; n<levels; n++) {
    if (v0grid[n])
      memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
@ -758,9 +765,6 @@ void MSM::deallocate_peratom()
      memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
    if (v5grid[n])
      memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
-
-    if (cg_peratom)
-      if (cg_peratom[n]) delete cg_peratom[n];
  }
 }

@ -771,10 +775,13 @@ void MSM::deallocate_peratom()
 void MSM::allocate_levels()
 {
  ngrid = new int[levels];
-
-  cg = new GridComm*[levels];
-  cg_peratom = new GridComm*[levels];
-
+ 
+  gc = new GridComm*[levels];
+  gc_buf1 = new double*[levels];
+  gc_buf2 = new double*[levels];
+  ngc_buf1 = new int[levels];
+  ngc_buf2 = new int[levels];
+ 
  memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
  world_levels = new MPI_Comm[levels];
  active_flag = new int[levels];
@ -819,9 +826,8 @@ void MSM::allocate_levels()
  v5grid = new double***[levels];

  for (int n=0; n<levels; n++) {
-    cg[n] = NULL;
+    gc[n] = NULL;
    world_levels[n] = MPI_COMM_NULL;
-    cg_peratom[n] = NULL;

    qgrid[n] = NULL;
    egrid[n] = NULL;
@ -833,7 +839,6 @@ void MSM::allocate_levels()
    v4grid[n] = NULL;
    v5grid[n] = NULL;
  }
-
 }

 /* ----------------------------------------------------------------------
@ -842,15 +847,18 @@ void MSM::allocate_levels()

 void MSM::deallocate_levels()
 {
-  if (cg) deallocate();
  delete [] ngrid;
  ngrid = nullptr;

  memory->destroy(procneigh_levels);
  delete [] world_levels;
  delete [] active_flag;
-  delete [] cg;
-  delete [] cg_peratom;
+
+  delete [] gc;
+  delete [] gc_buf1;
+  delete [] gc_buf2;
+  delete [] ngc_buf1;
+  delete [] ngc_buf2;

  delete [] alpha;
  delete [] betax;
@ -893,8 +901,8 @@ void MSM::deallocate_levels()

  world_levels = nullptr;
  active_flag = nullptr;
-  cg = nullptr;
-  cg_peratom = nullptr;
+  gc = nullptr;
+  gc_buf1 = gc_buf2 = nullptr;

  alpha = nullptr;
  betax = nullptr;
@ -1377,7 +1385,7 @@ void MSM::set_proc_grid(int n)

  // define a new MPI communicator for this grid level that only includes active procs

-  if(world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
+  if (world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
  MPI_Comm_split(world,color,me,&world_levels[n]);

  if (!active_flag[n]) return;
@ -2434,6 +2442,7 @@ void MSM::prolongation(int n)
   be cheaper than using nearest-neighbor communication (commgrid), right
   now only works for periodic boundary conditions
 ------------------------------------------------------------------------- */
+
 void MSM::grid_swap_forward(int n, double*** &gridn)
 {
  double ***gridn_tmp;
@ -2533,32 +2542,31 @@ void MSM::grid_swap_reverse(int n, double*** &gridn)
   pack own values to buf to send to another proc (used by commgrid)
 ------------------------------------------------------------------------- */

-void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
+void MSM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
 {
+  double *buf = (double *) vbuf;
+    
  int n = current_level;
-
-  double ***qgridn = qgrid[n];
-  double ***egridn = egrid[n];
-
-  double ***v0gridn = v0grid[n];
-  double ***v1gridn = v1grid[n];
-  double ***v2gridn = v2grid[n];
-  double ***v3gridn = v3grid[n];
-  double ***v4gridn = v4grid[n];
-  double ***v5gridn = v5grid[n];
-
  int k = 0;
-
+  
  if (flag == FORWARD_RHO) {
+    double ***qgridn = qgrid[n];
    double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++) {
      buf[k++] = qsrc[list[i]];
    }
  } else if (flag == FORWARD_AD) {
+    double ***egridn = egrid[n];
    double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];
  } else if (flag == FORWARD_AD_PERATOM) {
+    double ***v0gridn = v0grid[n];
+    double ***v1gridn = v1grid[n];
+    double ***v2gridn = v2grid[n];
+    double ***v3gridn = v3grid[n];
+    double ***v4gridn = v4grid[n];
+    double ***v5gridn = v5grid[n];
    double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2580,32 +2588,31 @@ void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
   unpack another proc's own values from buf and set own ghost values
 ------------------------------------------------------------------------- */

-void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
+void MSM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
 {
+  double *buf = (double *) vbuf;
+
  int n = current_level;
-
-  double ***qgridn = qgrid[n];
-  double ***egridn = egrid[n];
-
-  double ***v0gridn = v0grid[n];
-  double ***v1gridn = v1grid[n];
-  double ***v2gridn = v2grid[n];
-  double ***v3gridn = v3grid[n];
-  double ***v4gridn = v4grid[n];
-  double ***v5gridn = v5grid[n];
-
  int k = 0;

  if (flag == FORWARD_RHO) {
+  double ***qgridn = qgrid[n];
    double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++) {
      dest[list[i]] = buf[k++];
    }
  } else if (flag == FORWARD_AD) {
+    double ***egridn = egrid[n];
    double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] = buf[k++];
  } else if (flag == FORWARD_AD_PERATOM) {
+    double ***v0gridn = v0grid[n];
+    double ***v1gridn = v1grid[n];
+    double ***v2gridn = v2grid[n];
+    double ***v3gridn = v3grid[n];
+    double ***v4gridn = v4grid[n];
+    double ***v5gridn = v5grid[n];
    double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2627,32 +2634,31 @@ void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
   pack ghost values into buf to send to another proc
 ------------------------------------------------------------------------- */

-void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
+void MSM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
 {
+  double *buf = (double *) vbuf;
+
  int n = current_level;
-
-  double ***qgridn = qgrid[n];
-  double ***egridn = egrid[n];
-
-  double ***v0gridn = v0grid[n];
-  double ***v1gridn = v1grid[n];
-  double ***v2gridn = v2grid[n];
-  double ***v3gridn = v3grid[n];
-  double ***v4gridn = v4grid[n];
-  double ***v5gridn = v5grid[n];
-
  int k = 0;

  if (flag == REVERSE_RHO) {
+    double ***qgridn = qgrid[n];
    double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++) {
      buf[k++] = qsrc[list[i]];
    }
  } else if (flag == REVERSE_AD) {
+    double ***egridn = egrid[n];
    double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];
  } else if (flag == REVERSE_AD_PERATOM) {
+    double ***v0gridn = v0grid[n];
+    double ***v1gridn = v1grid[n];
+    double ***v2gridn = v2grid[n];
+    double ***v3gridn = v3grid[n];
+    double ***v4gridn = v4grid[n];
+    double ***v5gridn = v5grid[n];
    double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2674,32 +2680,31 @@ void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
   unpack another proc's ghost values from buf and add to own values
 ------------------------------------------------------------------------- */

-void MSM::unpack_reverse(int flag, double *buf, int nlist, int *list)
+void MSM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
 {
+  double *buf = (double *) vbuf;
+
  int n = current_level;
-
-  double ***qgridn = qgrid[n];
-  double ***egridn = egrid[n];
-
-  double ***v0gridn = v0grid[n];
-  double ***v1gridn = v1grid[n];
-  double ***v2gridn = v2grid[n];
-  double ***v3gridn = v3grid[n];
-  double ***v4gridn = v4grid[n];
-  double ***v5gridn = v5grid[n];
-
  int k = 0;

  if (flag == REVERSE_RHO) {
+    double ***qgridn = qgrid[n];
    double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++) {
      dest[list[i]] += buf[k++];
    }
  } else if (flag == REVERSE_AD) {
+    double ***egridn = egrid[n];
    double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] += buf[k++];
  } else if (flag == REVERSE_AD_PERATOM) {
+    double ***v0gridn = v0grid[n];
+    double ***v1gridn = v1grid[n];
+    double ***v2gridn = v2grid[n];
+    double ***v3gridn = v3grid[n];
+    double ***v4gridn = v4grid[n];
+    double ***v5gridn = v5grid[n];
    double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
    double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -3416,3 +3421,24 @@ void MSM::get_virial_direct_top(int n)
    }
  }
 }
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double MSM::memory_usage()
+{
+  double bytes = 0;
+
+  // NOTE: Stan, fill in other memory allocations here
+  
+  // all GridComm bufs
+
+  bytes += (ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
+
+  for (int n=0; n<levels; n++)
+    if (active_flag[n])
+      bytes += (ngc_buf1[n] + ngc_buf2[n]) * npergrid * sizeof(double);
+
+  return bytes;
+}
--- a/src/KSPACE/msm.h
+++ b/src/KSPACE/msm.h
@ -32,6 +32,7 @@ class MSM : public KSpace {
  void setup();
  virtual void settings(int, char **);
  virtual void compute(int, int);
+  virtual double memory_usage();

 protected:
  int me,nprocs;
@ -79,16 +80,21 @@ class MSM : public KSpace {
  int procgrid[3];                  // procs assigned in each dim of 3d grid
  int myloc[3];                     // which proc I am in each dim
  int ***procneigh_levels;          // my 6 neighboring procs, 0/1 = left/right
-  class GridComm **cg;
-  class GridComm **cg_peratom;
-  class GridComm *cg_all;
-  class GridComm *cg_peratom_all;
+  
+  class GridComm *gcall;       // GridComm class for finest level grid
+  class GridComm **gc;         // GridComm classes for each hierarchical level
+
+  double *gcall_buf1,*gcall_buf2;
+  double **gc_buf1,**gc_buf2;
+  int ngcall_buf1,ngcall_buf2,npergrid;
+  int *ngc_buf1,*ngc_buf2;

  int current_level;

  int **part2grid;             // storage for particle -> grid mapping
  int nmax;

+  int triclinic;
  double *boxlo;

  void set_grid_global();
@ -126,15 +132,12 @@ class MSM : public KSpace {
  void get_g_direct_top(int);
  void get_virial_direct_top(int);

-  // triclinic
-
-  int triclinic;
-
  // grid communication
-  void pack_forward(int, double *, int, int *);
-  void unpack_forward(int, double *, int, int *);
-  void pack_reverse(int, double *, int, int *);
-  void unpack_reverse(int, double *, int, int *);
+  
+  void pack_forward_grid(int, void *, int, int *);
+  void unpack_forward_grid(int, void *, int, int *);
+  void pack_reverse_grid(int, void *, int, int *);
+  void unpack_reverse_grid(int, void *, int, int *);
 };

 }
--- a/src/KSPACE/msm_cg.cpp
+++ b/src/KSPACE/msm_cg.cpp
@ -91,17 +91,7 @@ void MSMCG::compute(int eflag, int vflag)

  // invoke allocate_peratom() if needed for first time

-  if (vflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    cg_peratom_all->ghost_notify();
-    cg_peratom_all->setup();
-    for (int n=0; n<levels; n++) {
-      if (!active_flag[n]) continue;
-      cg_peratom[n]->ghost_notify();
-      cg_peratom[n]->setup();
-    }
-    peratom_allocate_flag = 1;
-  }
+  if (vflag_atom && !peratom_allocate_flag) allocate_peratom();

  // extend size of per-atom arrays if necessary

@ -171,7 +161,8 @@ void MSMCG::compute(int eflag, int vflag)
  //   to fully sum contribution in their 3d grid

  current_level = 0;
-  cg_all->reverse_comm(this,REVERSE_RHO);
+  gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
+			     gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // forward communicate charge density values to fill ghost grid points
  // compute direct sum interaction and then restrict to coarser grid
@ -179,24 +170,30 @@ void MSMCG::compute(int eflag, int vflag)
  for (int n=0; n<=levels-2; n++) {
    if (!active_flag[n]) continue;
    current_level = n;
-    cg[n]->forward_comm(this,FORWARD_RHO);
-
+    gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
+			       gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
    direct(n);
    restriction(n);
  }

-
  // compute direct interaction for top grid level for non-periodic
  //   and for second from top grid level for periodic

  if (active_flag[levels-1]) {
    if (domain->nonperiodic) {
      current_level = levels-1;
-      cg[levels-1]->forward_comm(this,FORWARD_RHO);
+      gc[levels-1]->
+	forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
+			    gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
      direct_top(levels-1);
-      cg[levels-1]->reverse_comm(this,REVERSE_AD);
+      gc[levels-1]->
+	reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
+			    gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
      if (vflag_atom)
-        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
+	gc[levels-1]->
+	  reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+			      gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
+
    } else {
      // Here using MPI_Allreduce is cheaper than using commgrid
      grid_swap_forward(levels-1,qgrid[levels-1]);
@ -204,7 +201,9 @@ void MSMCG::compute(int eflag, int vflag)
      grid_swap_reverse(levels-1,egrid[levels-1]);
      current_level = levels-1;
      if (vflag_atom)
-        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
+	gc[levels-1]->
+	  reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+			      gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
    }
  }

@ -216,24 +215,28 @@ void MSMCG::compute(int eflag, int vflag)
    prolongation(n);

    current_level = n;
-    cg[n]->reverse_comm(this,REVERSE_AD);
+    gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
+			       gc_buf1[n],gc_buf2[n],MPI_DOUBLE);

    // extra per-atom virial communication

    if (vflag_atom)
-      cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
+      gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
+				 gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
  }

  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  current_level = 0;
-  cg_all->forward_comm(this,FORWARD_AD);
+  gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
+			     gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // extra per-atom energy/virial communication

  if (vflag_atom)
-    cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
+    gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
+			       gcall_buf1,gcall_buf2,MPI_DOUBLE);

  // calculate the force on my particles (interpolation)

@ -536,6 +539,9 @@ void MSMCG::fieldforce_peratom()
  }
 }

+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */

 double MSMCG::memory_usage()
 {
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@ -3444,7 +3444,7 @@ void PPPM::poisson_groups_triclinic()
 }

 /* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
+   slab-geometry correction term to dampen inter-slab interactions between
   periodically repeating slabs.  Yields good approximation to 2D Ewald if
   adequate empty space is left between repeating slabs (J. Chem. Phys.
   111, 3155).  Slabs defined here to be parallel to the xy plane. Also