Revert "Rendezvous"

2019-03-25 21:30:48 -04:00 · 2019-03-25 21:30:48 -04:00 · d7a2949d1a
parent cb92bccdff
commit d7a2949d1a
16 changed files with 1207 additions and 2707 deletions
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@ -28,14 +28,12 @@
 #include "modify.h"
 #include "group.h"
 #include "comm.h"
-#include "neighbor.h"
 #include "force.h"
 #include "input.h"
 #include "output.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "math_const.h"
-#include "hashlittle.h"
 #include "memory.h"
 #include "error.h"

@ -45,8 +43,6 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;

-#define RVOUS 1   // 0 for irregular, 1 for all2all
-
 #define MAXLINE 1024
 #define CHUNK 1024
 #define ATTRIBUTE_PERBODY 20
@ -74,7 +70,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL),
  avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL),
  itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL),
-  id_dilate(NULL), onemols(NULL)
+  id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL),
+  idclose(NULL), rsqclose(NULL)
 {
  int i;

@ -110,18 +107,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  // parse args for rigid body specification

  int *mask = atom->mask;
-  tagint *bodyID = NULL;
+  tagint *bodyid = NULL;
  int nlocal = atom->nlocal;

  if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command");
  if (strcmp(arg[3],"molecule") == 0) {
    if (atom->molecule_flag == 0)
      error->all(FLERR,"Fix rigid/small requires atom attribute molecule");
-    bodyID = atom->molecule;
+    bodyid = atom->molecule;

  } else if (strcmp(arg[3],"custom") == 0) {
    if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command");
-      bodyID = new tagint[nlocal];
+      bodyid = new tagint[nlocal];
      customflag = 1;

      // determine whether atom-style variable or atom property is used.
@ -129,11 +126,9 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
        int is_double=0;
        int custom_index = atom->find_custom(arg[4]+2,is_double);
        if (custom_index == -1)
-          error->all(FLERR,"Fix rigid/small custom requires "
-                     "previously defined property/atom");
+          error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom");
        else if (is_double)
-          error->all(FLERR,"Fix rigid/small custom requires "
-                     "integer-valued property/atom");
+          error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom");

        int minval = INT_MAX;
        int *value = atom->ivector[custom_index];
@ -144,17 +139,15 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :

        for (i = 0; i < nlocal; i++)
          if (mask[i] & groupbit)
-            bodyID[i] = (tagint)(value[i] - minval + 1);
-          else bodyID[i] = 0;
+            bodyid[i] = (tagint)(value[i] - minval + 1);
+          else bodyid[i] = 0;

      } else if (strstr(arg[4],"v_") == arg[4]) {
        int ivariable = input->variable->find(arg[4]+2);
        if (ivariable < 0)
-          error->all(FLERR,"Variable name for fix rigid/small custom "
-                     "does not exist");
+          error->all(FLERR,"Variable name for fix rigid/small custom does not exist");
        if (input->variable->atomstyle(ivariable) == 0)
-          error->all(FLERR,"Fix rigid/small custom variable is not "
-                     "atom-style variable");
+          error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable");
        double *value = new double[nlocal];
        input->variable->compute_atom(ivariable,0,value,1,0);
        int minval = INT_MAX;
@ -165,8 +158,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :

        for (i = 0; i < nlocal; i++)
          if (mask[i] & groupbit)
-            bodyID[i] = (tagint)((tagint)value[i] - minval + 1);
-          else bodyID[0] = 0;
+            bodyid[i] = (tagint)((tagint)value[i] - minval + 1);
+          else bodyid[0] = 0;
        delete[] value;
      } else error->all(FLERR,"Unsupported fix rigid custom property");
  } else error->all(FLERR,"Illegal fix rigid/small command");
@ -174,11 +167,10 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  if (atom->map_style == 0)
    error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify");

-  // maxmol = largest bodyID #
-
+  // maxmol = largest bodyid #
  maxmol = -1;
  for (i = 0; i < nlocal; i++)
-    if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]);
+    if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]);

  tagint itmp;
  MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world);
@ -408,19 +400,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  // sets bodytag for owned atoms
  // body attributes are computed later by setup_bodies()

-  double time1 = MPI_Wtime();
-
-  create_bodies(bodyID);
-  if (customflag) delete [] bodyID;
-
-  double time2 = MPI_Wtime();
-
-  if (comm->me == 0) {
-    if (screen)
-      fprintf(screen,"  create bodies CPU = %g secs\n",time2-time1);
-    if (logfile)
-      fprintf(logfile,"  create bodies CPU = %g secs\n",time2-time1);
-  }
+  create_bodies(bodyid);
+  if (customflag) delete [] bodyid;

  // set nlocal_body and allocate bodies I own

@ -588,22 +569,12 @@ void FixRigidSmall::init()
      if (rflag && (modify->fmask[i] & POST_FORCE) &&
          !modify->fix[i]->rigid_flag) {
        char str[128];
-        snprintf(str,128,"Fix %s alters forces after fix rigid",
-                 modify->fix[i]->id);
+        snprintf(str,128,"Fix %s alters forces after fix rigid",modify->fix[i]->id);
        error->warning(FLERR,str);
      }
    }
  }

-  // error if maxextent > comm->cutghost
-  // NOTE: could just warn if an override flag set
-  // NOTE: this could fail for comm multi mode if user sets a wrong cutoff
-  //       for atom types in rigid bodies - need a more careful test
-
-  double cutghost = MAX(neighbor->cutneighmax,comm->cutghostuser);
-  if (maxextent > cutghost) 
-    error->all(FLERR,"Rigid body extent > ghost cutoff - use comm_modify cutoff");
-
  // error if npt,nph fix comes before rigid fix

  for (i = 0; i < modify->nfix; i++) {
@ -1543,72 +1514,175 @@ void FixRigidSmall::set_v()
   set bodytag for all owned atoms
 ------------------------------------------------------------------------- */

-void FixRigidSmall::create_bodies(tagint *bodyID)
+void FixRigidSmall::create_bodies(tagint *bodyid)
 {
-  int i,m;
+  int i,m,n;
+  double unwrap[3];

-  // allocate buffer for input to rendezvous comm
-  // ncount = # of my atoms in bodies
-  
+  // error check on image flags of atoms in rigid bodies
+
+  imageint *image = atom->image;
  int *mask = atom->mask;
  int nlocal = atom->nlocal;

+  int *periodicity = domain->periodicity;
+  int xbox,ybox,zbox;
+
+  int flag = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+    xbox = (image[i] & IMGMASK) - IMGMAX;
+    ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
+    zbox = (image[i] >> IMG2BITS) - IMGMAX;
+    if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) ||
+        (zbox && !periodicity[2])) flag = 1;
+  }
+
+  int flagall;
+  MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
+  if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag "
+                          "in a non-periodic dimension");
+
+  // allocate buffer for passing messages around ring of procs
+  // percount = max number of values to put in buffer for each of ncount
+
  int ncount = 0;
  for (i = 0; i < nlocal; i++)
    if (mask[i] & groupbit) ncount++;

-  int *proclist;
-  memory->create(proclist,ncount,"rigid/small:proclist");
-  InRvous *inbuf = (InRvous *) 
-    memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf");
+  int percount = 5;
+  double *buf;
+  memory->create(buf,ncount*percount,"rigid/small:buf");

-  // setup buf to pass to rendezvous comm
-  // one BodyMsg datum for each constituent atom
-  // datum = me, local index of atom, atomID, bodyID, unwrapped coords
-  // owning proc for each datum = random hash of bodyID
+  // create map hash for storing unique body IDs of my atoms
+  // key = body ID
+  // value = index into per-body data structure
+  // n = # of entries in hash
+
+  hash = new std::map<tagint,int>();
+  hash->clear();
+
+  // setup hash
+  // key = body ID
+  // value = index into N-length data structure
+  // n = count of unique bodies my atoms are part of
+
+  n = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+    if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++;
+  }
+
+  // bbox = bounding box of each rigid body my atoms are part of
+
+  memory->create(bbox,n,6,"rigid/small:bbox");
+
+  for (i = 0; i < n; i++) {
+    bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG;
+    bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG;
+  }
+
+  // pack my atoms into buffer as body ID, unwrapped coords

  double **x = atom->x;
-  tagint *tag = atom->tag;
-  imageint *image = atom->image;

  m = 0;
  for (i = 0; i < nlocal; i++) {
    if (!(mask[i] & groupbit)) continue;
-    proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs;
-    inbuf[m].me = me;
-    inbuf[m].ilocal = i;
-    inbuf[m].atomID = tag[i];
-    inbuf[m].bodyID = bodyID[i];
-    domain->unmap(x[i],image[i],inbuf[m].x);
-    m++;
+    domain->unmap(x[i],image[i],unwrap);
+    buf[m++] = bodyid[i];
+    buf[m++] = unwrap[0];
+    buf[m++] = unwrap[1];
+    buf[m++] = unwrap[2];
  }

-  // perform rendezvous operation
-  // each proc owns random subset of bodies
-  // receives all atoms in those bodies
-  // func = compute bbox of each body, find atom closest to geometric center
+  // pass buffer around ring of procs
+  // func = update bbox with atom coords from every proc
+  // when done, have full bbox for every rigid body my atoms are part of

-  char *buf;
-  int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous),
-                                 0,proclist,
-                                 rendezvous_body,0,buf,sizeof(OutRvous),
-                                 (void *) this);
-  OutRvous *outbuf = (OutRvous *) buf;
-  
-  memory->destroy(proclist);
-  memory->sfree(inbuf);
+  comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this);

-  // set bodytag of all owned atoms based on outbuf info for constituent atoms
+  // check if any bbox is size 0.0, meaning rigid body is a single particle

-  for (i = 0; i < nlocal; i++) 
-    if (!(mask[i] & groupbit)) bodytag[i] = 0;
+  flag = 0;
+  for (i = 0; i < n; i++)
+    if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] &&
+        bbox[i][4] == bbox[i][5]) flag = 1;
+  MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
+  if (flagall)
+    error->all(FLERR,"One or more rigid bodies are a single particle");

-  for (m = 0; m < nreturn; m++)
-    bodytag[outbuf[m].ilocal] = outbuf[m].atomID;
+  // ctr = center pt of each rigid body my atoms are part of

-  memory->sfree(outbuf);
+  memory->create(ctr,n,6,"rigid/small:bbox");

-  // maxextent = max of rsqfar across all procs
+  for (i = 0; i < n; i++) {
+    ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]);
+    ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]);
+    ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]);
+  }
+
+  // idclose = ID of atom in body closest to center pt (smaller ID if tied)
+  // rsqclose = distance squared from idclose to center pt
+
+  memory->create(idclose,n,"rigid/small:idclose");
+  memory->create(rsqclose,n,"rigid/small:rsqclose");
+
+  for (i = 0; i < n; i++) rsqclose[i] = BIG;
+
+  // pack my atoms into buffer as body ID, atom ID, unwrapped coords
+
+  tagint *tag = atom->tag;
+
+  m = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+    domain->unmap(x[i],image[i],unwrap);
+    buf[m++] = bodyid[i];
+    buf[m++] = ubuf(tag[i]).d;
+    buf[m++] = unwrap[0];
+    buf[m++] = unwrap[1];
+    buf[m++] = unwrap[2];
+  }
+
+  // pass buffer around ring of procs
+  // func = update idclose,rsqclose with atom IDs from every proc
+  // when done, have idclose for every rigid body my atoms are part of
+
+  comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this);
+
+  // set bodytag of all owned atoms, based on idclose
+  // find max value of rsqclose across all procs
+
+  double rsqmax = 0.0;
+  for (i = 0; i < nlocal; i++) {
+    bodytag[i] = 0;
+    if (!(mask[i] & groupbit)) continue;
+    m = hash->find(bodyid[i])->second;
+    bodytag[i] = idclose[m];
+    rsqmax = MAX(rsqmax,rsqclose[m]);
+  }
+
+  // pack my atoms into buffer as bodytag of owning atom, unwrapped coords
+
+  m = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+    domain->unmap(x[i],image[i],unwrap);
+    buf[m++] = ubuf(bodytag[i]).d;
+    buf[m++] = unwrap[0];
+    buf[m++] = unwrap[1];
+    buf[m++] = unwrap[2];
+  }
+
+  // pass buffer around ring of procs
+  // func = update rsqfar for atoms belonging to bodies I own
+  // when done, have rsqfar for all atoms in bodies I own
+
+  rsqfar = 0.0;
+  comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this);
+
+  // find maxextent of rsqfar across all procs
  // if defined, include molecule->maxextent

  MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world);
@ -1617,156 +1691,125 @@ void FixRigidSmall::create_bodies(tagint *bodyID)
    for (int i = 0; i < nmol; i++)
      maxextent = MAX(maxextent,onemols[i]->maxextent);
  }
-}
-
-/* ----------------------------------------------------------------------
-   process rigid bodies assigned to me
-   buf = list of N BodyMsg datums
------------------------------------------------------------------------- */
-
-int FixRigidSmall::rendezvous_body(int n, char *inbuf,
-                                   int &rflag, int *&proclist, char *&outbuf,
-                                   void *ptr)
-{
-  int i,j,m;
-  double delx,dely,delz,rsq;
-  int *iclose;
-  tagint *idclose;
-  double *x,*xown,*rsqclose;
-  double **bbox,**ctr;
-
-  FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
-  Memory *memory = frsptr->memory;
-  Error *error = frsptr->error;
-  MPI_Comm world = frsptr->world;
-
-  // setup hash
-  // use STL map instead of atom->map
-  //   b/c know nothing about body ID values specified by user
-  // ncount = number of bodies assigned to me
-  // key = body ID
-  // value = index into Ncount-length data structure
-
-  InRvous *in = (InRvous *) inbuf;
-  std::map<tagint,int> hash;
-  tagint id;
-  
-  int ncount = 0;
-  for (i = 0; i < n; i++) {
-    id = in[i].bodyID;
-    if (hash.find(id) == hash.end()) hash[id] = ncount++;
-  }
-
-  // bbox = bounding box of each rigid body
-
-  memory->create(bbox,ncount,6,"rigid/small:bbox");
-
-  for (m = 0; m < ncount; m++) {
-    bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG;
-    bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG;
-  }
-
-  for (i = 0; i < n; i++) {
-    m = hash.find(in[i].bodyID)->second;
-    x = in[i].x;
-    bbox[m][0] = MIN(bbox[m][0],x[0]);
-    bbox[m][1] = MAX(bbox[m][1],x[0]);
-    bbox[m][2] = MIN(bbox[m][2],x[1]);
-    bbox[m][3] = MAX(bbox[m][3],x[1]);
-    bbox[m][4] = MIN(bbox[m][4],x[2]);
-    bbox[m][5] = MAX(bbox[m][5],x[2]);
-  }
-
-  // check if any bbox is size 0.0, meaning rigid body is a single particle
-
-  int flag = 0;
-  for (m = 0; m < ncount; m++)
-    if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] &&
-        bbox[m][4] == bbox[m][5]) flag = 1;
-  int flagall;
-  MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);    // sync here?
-  if (flagall)
-    error->all(FLERR,"One or more rigid bodies are a single particle");
-
-  // ctr = geometric center pt of each rigid body
-
-  memory->create(ctr,ncount,3,"rigid/small:bbox");
-
-  for (m = 0; m < ncount; m++) {
-    ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]);
-    ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]);
-    ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]);
-  }
-
-  // idclose = atomID closest to center point of each body
-
-  memory->create(idclose,ncount,"rigid/small:idclose");
-  memory->create(iclose,ncount,"rigid/small:iclose");
-  memory->create(rsqclose,ncount,"rigid/small:rsqclose");
-  for (m = 0; m < ncount; m++) rsqclose[m] = BIG;
-
-  for (i = 0; i < n; i++) {
-    m = hash.find(in[i].bodyID)->second;
-    x = in[i].x;
-    delx = x[0] - ctr[m][0];
-    dely = x[1] - ctr[m][1];
-    delz = x[2] - ctr[m][2];
-    rsq = delx*delx + dely*dely + delz*delz;
-    if (rsq <= rsqclose[m]) {
-      if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue;
-      iclose[m] = i;
-      idclose[m] = in[i].atomID;
-      rsqclose[m] = rsq;
-    }
-  }
-
-  // compute rsqfar for all bodies I own
-  // set rsqfar back in caller
-  
-  double rsqfar = 0.0;
-
-  for (int i = 0; i < n; i++) {
-    m = hash.find(in[i].bodyID)->second;
-    xown = in[iclose[m]].x;
-    x = in[i].x;
-    delx = x[0] - xown[0];
-    dely = x[1] - xown[1];
-    delz = x[2] - xown[2];
-    rsq = delx*delx + dely*dely + delz*delz;
-    rsqfar = MAX(rsqfar,rsq);
-  }
-
-  frsptr->rsqfar = rsqfar;
-
-  // pass list of OutRvous datums back to comm->rendezvous
-
-  int nout = n;
-  memory->create(proclist,nout,"rigid/small:proclist");
-  OutRvous *out = (OutRvous *) 
-    memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out");
-
-  for (int i = 0; i < nout; i++) {
-    proclist[i] = in[i].me;
-    out[i].ilocal = in[i].ilocal;
-    m = hash.find(in[i].bodyID)->second;
-    out[i].atomID = idclose[m];
-  }
-
-  outbuf = (char *) out;

  // clean up
-  // Comm::rendezvous will delete proclist and out (outbuf)

+  delete hash;
+  memory->destroy(buf);
  memory->destroy(bbox);
  memory->destroy(ctr);
  memory->destroy(idclose);
-  memory->destroy(iclose);
  memory->destroy(rsqclose);
+}

-  // flag = 2: new outbuf
+/* ----------------------------------------------------------------------
+   process rigid body atoms from another proc
+   update bounding box for rigid bodies my atoms are part of
+------------------------------------------------------------------------- */

-  rflag = 2;
-  return nout;
+void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr)
+{
+  FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
+  std::map<tagint,int> *hash = frsptr->hash;
+  double **bbox = frsptr->bbox;
+
+  double *buf = (double *) cbuf;
+  int ndatums = n/4;
+
+  int j,imol;
+  double *x;
+
+  int m = 0;
+  for (int i = 0; i < ndatums; i++, m += 4) {
+    imol = static_cast<int> (buf[m]);
+    if (hash->find(imol) != hash->end()) {
+      j = hash->find(imol)->second;
+      x = &buf[m+1];
+      bbox[j][0] = MIN(bbox[j][0],x[0]);
+      bbox[j][1] = MAX(bbox[j][1],x[0]);
+      bbox[j][2] = MIN(bbox[j][2],x[1]);
+      bbox[j][3] = MAX(bbox[j][3],x[1]);
+      bbox[j][4] = MIN(bbox[j][4],x[2]);
+      bbox[j][5] = MAX(bbox[j][5],x[2]);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   process rigid body atoms from another proc
+   update nearest atom to body center for rigid bodies my atoms are part of
+------------------------------------------------------------------------- */
+
+void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr)
+{
+  FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
+  std::map<tagint,int> *hash = frsptr->hash;
+  double **ctr = frsptr->ctr;
+  tagint *idclose = frsptr->idclose;
+  double *rsqclose = frsptr->rsqclose;
+
+  double *buf = (double *) cbuf;
+  int ndatums = n/5;
+
+  int j,imol;
+  tagint tag;
+  double delx,dely,delz,rsq;
+  double *x;
+
+  int m = 0;
+  for (int i = 0; i < ndatums; i++, m += 5) {
+    imol = static_cast<int> (buf[m]);
+    if (hash->find(imol) != hash->end()) {
+      j = hash->find(imol)->second;
+      tag = (tagint) ubuf(buf[m+1]).i;
+      x = &buf[m+2];
+      delx = x[0] - ctr[j][0];
+      dely = x[1] - ctr[j][1];
+      delz = x[2] - ctr[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      if (rsq <= rsqclose[j]) {
+        if (rsq == rsqclose[j] && tag > idclose[j]) continue;
+        idclose[j] = tag;
+        rsqclose[j] = rsq;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   process rigid body atoms from another proc
+   update rsqfar = distance from owning atom to other atom
+------------------------------------------------------------------------- */
+
+void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr)
+{
+  FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
+  double **x = frsptr->atom->x;
+  imageint *image = frsptr->atom->image;
+  int nlocal = frsptr->atom->nlocal;
+
+  double *buf = (double *) cbuf;
+  int ndatums = n/4;
+
+  int iowner;
+  tagint tag;
+  double delx,dely,delz,rsq;
+  double *xx;
+  double unwrap[3];
+
+  int m = 0;
+  for (int i = 0; i < ndatums; i++, m += 4) {
+    tag = (tagint) ubuf(buf[m]).i;
+    iowner = frsptr->atom->map(tag);
+    if (iowner < 0 || iowner >= nlocal) continue;
+    frsptr->domain->unmap(x[iowner],image[iowner],unwrap);
+    xx = &buf[m+1];
+    delx = xx[0] - unwrap[0];
+    dely = xx[1] - unwrap[1];
+    delz = xx[2] - unwrap[2];
+    rsq = delx*delx + dely*dely + delz*delz;
+    frsptr->rsqfar = MAX(frsptr->rsqfar,rsq);
+  }
 }

 /* ----------------------------------------------------------------------
@ -2429,9 +2472,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)

  int nlocal = atom->nlocal;

-  std::map<tagint,int> hash;
+  hash = new std::map<tagint,int>();
  for (i = 0; i < nlocal; i++)
-    if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i];
+    if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i];

  // open file and read header

@ -2490,11 +2533,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
      id = ATOTAGINT(values[0]);
      if (id <= 0 || id > maxmol)
        error->all(FLERR,"Invalid rigid body ID in fix rigid/small file");
-      if (hash.find(id) == hash.end()) {
+      if (hash->find(id) == hash->end()) {
        buf = next + 1;
        continue;
      }
-      m = hash[id];
+      m = (*hash)[id];
      inbody[m] = 1;

      if (which == 0) {
@ -2533,6 +2576,7 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)

  delete [] buffer;
  delete [] values;
+  delete hash;
 }

 /* ----------------------------------------------------------------------
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall)
 #include "fix.h"

 // replace this later
-//#include <map>
+#include <map>

 namespace LAMMPS_NS {

@ -180,21 +180,13 @@ class FixRigidSmall : public Fix {

  // class data used by ring communication callbacks

+  std::map<tagint,int> *hash;
+  double **bbox;
+  double **ctr;
+  tagint *idclose;
+  double *rsqclose;
  double rsqfar;

-  struct InRvous {
-    int me,ilocal;
-    tagint atomID,bodyID;
-    double x[3];
-  };
-
-  struct OutRvous {
-    int ilocal;
-    tagint atomID;
-  };
-
-  // local methods
-
  void image_shift();
  void set_xv();
  void set_v();
@ -207,9 +199,11 @@ class FixRigidSmall : public Fix {
  void grow_body();
  void reset_atom2body();

-  // callback function for rendezvous communication
+  // callback functions for ring communication

-  static int rendezvous_body(int, char *, int &, int *&, char *&, void *);
+  static void ring_bbox(int, char *, void *);
+  static void ring_nearest(int, char *, void *);
+  static void ring_farthest(int, char *, void *);

  // debug

--- a/src/RIGID/fix_shake.cpp
+++ b/src/RIGID/fix_shake.cpp
@ -39,8 +39,6 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;

-#define RVOUS 1   // 0 for irregular, 1 for all2all
-
 #define BIG 1.0e20
 #define MASSDELTA 0.1

@ -221,19 +219,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) :

  // identify all SHAKE clusters

-  double time1 = MPI_Wtime();
-
  find_clusters();

-  double time2 = MPI_Wtime();
-
-  if (comm->me == 0) {
-    if (screen)
-      fprintf(screen,"  find clusters CPU = %g secs\n",time2-time1);
-    if (logfile)
-      fprintf(logfile,"  find clusters CPU = %g secs\n",time2-time1);
-  }
-
  // initialize list of SHAKE clusters to constrain

  maxlist = 0;
@ -720,6 +707,13 @@ void FixShake::find_clusters()
  int nlocal = atom->nlocal;
  int angles_allow = atom->avec->angles_allow;

+  // setup ring of procs
+
+  int next = me + 1;
+  int prev = me -1;
+  if (next == nprocs) next = 0;
+  if (prev < 0) prev = nprocs - 1;
+
  // -----------------------------------------------------
  // allocate arrays for self (1d) and bond partners (2d)
  // max = max # of bond partners for owned atoms = 2nd dim of partner arrays
@ -761,10 +755,6 @@ void FixShake::find_clusters()
  memory->create(partner_shake,nlocal,max,"shake:partner_shake");
  memory->create(partner_nshake,nlocal,max,"shake:partner_nshake");

-  // setup atomIDs and procowner vectors in rendezvous decomposition
-
-  atom_owners();
-
  // -----------------------------------------------------
  // set npartner and partner_tag from special arrays
  // -----------------------------------------------------
@ -788,13 +778,86 @@ void FixShake::find_clusters()
  }

  // -----------------------------------------------------
-  // set partner_mask, partner_type, partner_massflag,
-  //   partner_bondtype for all my bonded partners
-  // requires rendezvous communication for off-proc partners
+  // set partner_mask, partner_type, partner_massflag, partner_bondtype
+  //   for bonded partners
+  // requires communication for off-proc partners
  // -----------------------------------------------------

-  partner_info(npartner,partner_tag,partner_mask,partner_type,
-	       partner_massflag,partner_bondtype);
+  // fill in mask, type, massflag, bondtype if own bond partner
+  // info to store in buf for each off-proc bond = nper = 6
+  //   2 atoms IDs in bond, space for mask, type, massflag, bondtype
+  // nbufmax = largest buffer needed to hold info from any proc
+
+  int nper = 6;
+
+  nbuf = 0;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < npartner[i]; j++) {
+      partner_mask[i][j] = 0;
+      partner_type[i][j] = 0;
+      partner_massflag[i][j] = 0;
+      partner_bondtype[i][j] = 0;
+
+      m = atom->map(partner_tag[i][j]);
+      if (m >= 0 && m < nlocal) {
+        partner_mask[i][j] = mask[m];
+        partner_type[i][j] = type[m];
+        if (nmass) {
+          if (rmass) massone = rmass[m];
+          else massone = mass[type[m]];
+          partner_massflag[i][j] = masscheck(massone);
+        }
+        n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
+        if (n) partner_bondtype[i][j] = n;
+        else {
+          n = bondtype_findset(m,tag[i],partner_tag[i][j],0);
+          if (n) partner_bondtype[i][j] = n;
+        }
+      } else nbuf += nper;
+    }
+  }
+
+  memory->create(buf,nbuf,"shake:buf");
+
+  // fill buffer with info
+
+  size = 0;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < npartner[i]; j++) {
+      m = atom->map(partner_tag[i][j]);
+      if (m < 0 || m >= nlocal) {
+        buf[size] = tag[i];
+        buf[size+1] = partner_tag[i][j];
+        buf[size+2] = 0;
+        buf[size+3] = 0;
+        buf[size+4] = 0;
+        n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
+        if (n) buf[size+5] = n;
+        else buf[size+5] = 0;
+        size += nper;
+      }
+    }
+  }
+
+  // cycle buffer around ring of procs back to self
+
+  comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this);
+
+  // store partner info returned to me
+
+  m = 0;
+  while (m < size) {
+    i = atom->map(buf[m]);
+    for (j = 0; j < npartner[i]; j++)
+      if (buf[m+1] == partner_tag[i][j]) break;
+    partner_mask[i][j] = buf[m+2];
+    partner_type[i][j] = buf[m+3];
+    partner_massflag[i][j] = buf[m+4];
+    partner_bondtype[i][j] = buf[m+5];
+    m += nper;
+  }
+
+  memory->destroy(buf);

  // error check for unfilled partner info
  // if partner_type not set, is an error
@ -805,18 +868,17 @@ void FixShake::find_clusters()
  // else it's an error

  flag = 0;
-  int flag2 = 0;
  for (i = 0; i < nlocal; i++)
    for (j = 0; j < npartner[i]; j++) {
-      if (partner_type[i][j] == 0) flag++;
+      if (partner_type[i][j] == 0) flag = 1;
      if (!(mask[i] & groupbit)) continue;
      if (!(partner_mask[i][j] & groupbit)) continue;
-      if (partner_bondtype[i][j] == 0) flag2++;
+      if (partner_bondtype[i][j] == 0) flag = 1;
    }

  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
  if (flag_all) error->all(FLERR,"Did not find fix shake partner info");
-  
+
  // -----------------------------------------------------
  // identify SHAKEable bonds
  // set nshake[i] = # of SHAKE bonds attached to atom i
@ -869,11 +931,56 @@ void FixShake::find_clusters()

  // -----------------------------------------------------
  // set partner_nshake for bonded partners
-  // requires rendezvous communication for off-proc partners
+  // requires communication for off-proc partners
  // -----------------------------------------------------

-  nshake_info(npartner,partner_tag,partner_nshake);
-  
+  // fill in partner_nshake if own bond partner
+  // info to store in buf for each off-proc bond =
+  //   2 atoms IDs in bond, space for nshake value
+  // nbufmax = largest buffer needed to hold info from any proc
+
+  nbuf = 0;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < npartner[i]; j++) {
+      m = atom->map(partner_tag[i][j]);
+      if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m];
+      else nbuf += 3;
+    }
+  }
+
+  memory->create(buf,nbuf,"shake:buf");
+
+  // fill buffer with info
+
+  size = 0;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < npartner[i]; j++) {
+      m = atom->map(partner_tag[i][j]);
+      if (m < 0 || m >= nlocal) {
+        buf[size] = tag[i];
+        buf[size+1] = partner_tag[i][j];
+        size += 3;
+      }
+    }
+  }
+
+  // cycle buffer around ring of procs back to self
+
+  comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this);
+
+  // store partner info returned to me
+
+  m = 0;
+  while (m < size) {
+    i = atom->map(buf[m]);
+    for (j = 0; j < npartner[i]; j++)
+      if (buf[m+1] == partner_tag[i][j]) break;
+    partner_nshake[i][j] = buf[m+2];
+    m += 3;
+  }
+
+  memory->destroy(buf);
+
  // -----------------------------------------------------
  // error checks
  // no atom with nshake > 3
@ -881,7 +988,7 @@ void FixShake::find_clusters()
  // -----------------------------------------------------

  flag = 0;
-  for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag++;
+  for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1;
  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
  if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms");

@ -889,7 +996,7 @@ void FixShake::find_clusters()
  for (i = 0; i < nlocal; i++) {
    if (nshake[i] <= 1) continue;
    for (j = 0; j < npartner[i]; j++)
-      if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag++;
+      if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1;
  }
  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
  if (flag_all) error->all(FLERR,"Shake clusters are connected");
@ -957,18 +1064,68 @@ void FixShake::find_clusters()

  // -----------------------------------------------------
  // set shake_flag,shake_atom,shake_type for non-central atoms
-  // requires rendezvous communication for off-proc atoms
+  // requires communication for off-proc atoms
  // -----------------------------------------------------

-  shake_info(npartner,partner_tag,partner_shake);
+  // fill in shake arrays for each bond partner I own
+  // info to store in buf for each off-proc bond =
+  //   all values from shake_flag, shake_atom, shake_type
+  // nbufmax = largest buffer needed to hold info from any proc
+
+  nbuf = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (shake_flag[i] == 0) continue;
+    for (j = 0; j < npartner[i]; j++) {
+      if (partner_shake[i][j] == 0) continue;
+      m = atom->map(partner_tag[i][j]);
+      if (m >= 0 && m < nlocal) {
+        shake_flag[m] = shake_flag[i];
+        shake_atom[m][0] = shake_atom[i][0];
+        shake_atom[m][1] = shake_atom[i][1];
+        shake_atom[m][2] = shake_atom[i][2];
+        shake_atom[m][3] = shake_atom[i][3];
+        shake_type[m][0] = shake_type[i][0];
+        shake_type[m][1] = shake_type[i][1];
+        shake_type[m][2] = shake_type[i][2];
+      } else nbuf += 9;
+    }
+  }
+
+  memory->create(buf,nbuf,"shake:buf");
+
+  // fill buffer with info
+
+  size = 0;
+  for (i = 0; i < nlocal; i++) {
+    if (shake_flag[i] == 0) continue;
+    for (j = 0; j < npartner[i]; j++) {
+      if (partner_shake[i][j] == 0) continue;
+      m = atom->map(partner_tag[i][j]);
+      if (m < 0 || m >= nlocal) {
+        buf[size] = partner_tag[i][j];
+        buf[size+1] = shake_flag[i];
+        buf[size+2] = shake_atom[i][0];
+        buf[size+3] = shake_atom[i][1];
+        buf[size+4] = shake_atom[i][2];
+        buf[size+5] = shake_atom[i][3];
+        buf[size+6] = shake_type[i][0];
+        buf[size+7] = shake_type[i][1];
+        buf[size+8] = shake_type[i][2];
+        size += 9;
+      }
+    }
+  }
+
+  // cycle buffer around ring of procs back to self
+
+  comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this);
+
+  memory->destroy(buf);

  // -----------------------------------------------------
  // free local memory
  // -----------------------------------------------------

-  memory->destroy(atomIDs);
-  memory->destroy(procowner);
-
  memory->destroy(npartner);
  memory->destroy(nshake);
  memory->destroy(partner_tag);
@ -1042,551 +1199,98 @@ void FixShake::find_clusters()
 }

 /* ----------------------------------------------------------------------
-   setup atomIDs and procowner
+   when receive buffer, scan bond partner IDs for atoms I own
+   if I own partner:
+     fill in mask and type and massflag
+     search for bond with 1st atom and fill in bondtype
 ------------------------------------------------------------------------- */

-void FixShake::atom_owners()
+void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr)
 {
-  tagint *tag = atom->tag;
-  int nlocal = atom->nlocal;
-
-  int *proclist;
-  memory->create(proclist,nlocal,"shake:proclist");
-  IDRvous *idbuf = (IDRvous *) 
-    memory->smalloc((bigint) nlocal*sizeof(IDRvous),"shake:idbuf");
-
-  // setup input buf to rendezvous comm
-  // input datums = pairs of bonded atoms
-  // owning proc for each datum = random hash of atomID
-  // one datum for each owned atom: datum = owning proc, atomID
-
-  for (int i = 0; i < nlocal; i++) {
-    proclist[i] = tag[i] % nprocs;
-    idbuf[i].me = me;
-    idbuf[i].atomID = tag[i];
-  }
-
-  // perform rendezvous operation
-  // each proc assigned every 1/Pth atom
-  
-  char *buf;
-  comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),
-                   0,proclist,
-                   rendezvous_ids,0,buf,0,(void *) this);
-
-  memory->destroy(proclist);
-  memory->sfree(idbuf);
-}
-
-/* ----------------------------------------------------------------------
-   setup partner_mask, partner_type, partner_massflag, partner_bondtype
------------------------------------------------------------------------- */
-
-void FixShake::partner_info(int *npartner, tagint **partner_tag,
-			    int **partner_mask, int **partner_type,
-			    int **partner_massflag, int **partner_bondtype)
-{
-  int i,j,m,n;
-  int nlocal = atom->nlocal;
-
-  // nsend = # of my datums to send
-  // one datum for every off-processor partner
-  
-  int nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < npartner[i]; j++) {
-      m = atom->map(partner_tag[i][j]);
-      if (m < 0 || m >= nlocal) nsend++;
-    }
-  }
-
-  int *proclist;
-  memory->create(proclist,nsend,"special:proclist");
-  PartnerInfo *inbuf = (PartnerInfo *) 
-    memory->smalloc((bigint) nsend*sizeof(PartnerInfo),"special:inbuf");
-
-  // set values in 4 partner arrays for all partner atoms I own
-  // also setup input buf to rendezvous comm
-  // input datums = pair of bonded atoms where I do not own partner
-  // owning proc for each datum = partner_tag % nprocs
-  // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc)
-  //        4 values for my owned atom
-
+  FixShake *fsptr = (FixShake *)ptr;
+  Atom *atom = fsptr->atom;
  double *rmass = atom->rmass;
  double *mass = atom->mass;
-  int *type = atom->type;
  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  
-  double massone;
-  
-  nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < npartner[i]; j++) {
-      partner_mask[i][j] = 0;
-      partner_type[i][j] = 0;
-      partner_massflag[i][j] = 0;
-      partner_bondtype[i][j] = 0;
-
-      m = atom->map(partner_tag[i][j]);
-      
-      if (m >= 0 && m < nlocal) {
-        partner_mask[i][j] = mask[m];
-        partner_type[i][j] = type[m];
-        if (nmass) {
-          if (rmass) massone = rmass[m];
-          else massone = mass[type[m]];
-          partner_massflag[i][j] = masscheck(massone);
-        }
-        n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
-        if (n) partner_bondtype[i][j] = n;
-        else {
-          n = bondtype_findset(m,tag[i],partner_tag[i][j],0);
-          if (n) partner_bondtype[i][j] = n;
-        }
-	
-      } else {
-	proclist[nsend] = partner_tag[i][j] % nprocs;
-	inbuf[nsend].atomID = partner_tag[i][j];
-	inbuf[nsend].partnerID = tag[i];
-	inbuf[nsend].mask = mask[i];
-	inbuf[nsend].type = type[i];
-        if (nmass) {
-          if (rmass) massone = rmass[i];
-          else massone = mass[type[i]];
-	  inbuf[nsend].massflag = masscheck(massone);
-        } else inbuf[nsend].massflag = 0;
-
-	// my atom may own bond, in which case set partner_bondtype
-	// else receiver of this datum will own the bond and return the value
-	
-        n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
-        if (n) {
-	  partner_bondtype[i][j] = n;
-	  inbuf[nsend].bondtype = n;
-	} else inbuf[nsend].bondtype = 0;
-	
-	nsend++;
-      }
-    }
-  }
-
-  // perform rendezvous operation
-  // each proc owns random subset of atoms
-  // receives all data needed to populate un-owned partner 4 values
-
-  char *buf;
-  int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PartnerInfo),
-                                 0,proclist,
-                                 rendezvous_partners_info,
-                                 0,buf,sizeof(PartnerInfo),
-                                 (void *) this);
-  PartnerInfo *outbuf = (PartnerInfo *) buf;
-    
-  memory->destroy(proclist);
-  memory->sfree(inbuf);
-
-  // set partner 4 values for un-onwed partners based on output info
-  // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for
-  
-  for (m = 0; m < nreturn; m++) {
-    i = atom->map(outbuf[m].atomID);
-    for (j = 0; j < npartner[i]; j++)
-      if (partner_tag[i][j] == outbuf[m].partnerID) break;
-    partner_mask[i][j] = outbuf[m].mask;
-    partner_type[i][j] = outbuf[m].type;
-    partner_massflag[i][j] = outbuf[m].massflag;
-
-    // only set partner_bondtype if my atom did not set it
-    //   when setting up rendezvous
-    // if this proc set it, then sender of this datum set outbuf.bondtype = 0
-    
-    if (partner_bondtype[i][j] == 0)
-      partner_bondtype[i][j] = outbuf[m].bondtype;
-  }
-
-  memory->sfree(outbuf);
-}
-
-/* ----------------------------------------------------------------------
-   setup partner_nshake
------------------------------------------------------------------------- */
-
-void FixShake::nshake_info(int *npartner, tagint **partner_tag,
-			   int **partner_nshake)
-{
-  int i,j,m,n;
+  int *type = atom->type;
  int nlocal = atom->nlocal;
-
-  // nsend = # of my datums to send
-  // one datum for every off-processor partner
-  
-  int nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < npartner[i]; j++) {
-      m = atom->map(partner_tag[i][j]);
-      if (m < 0 || m >= nlocal) nsend++;
-    }
-  }
-
-  int *proclist;
-  memory->create(proclist,nsend,"special:proclist");
-  NShakeInfo *inbuf = (NShakeInfo *) 
-    memory->smalloc((bigint) nsend*sizeof(NShakeInfo),"special:inbuf");
-
-  // set partner_nshake for all partner atoms I own
-  // also setup input buf to rendezvous comm
-  // input datums = pair of bonded atoms where I do not own partner
-  // owning proc for each datum = partner_tag % nprocs
-  // datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc)
-  //        nshake value for my owned atom
-
-  tagint *tag = atom->tag;
-
-  nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < npartner[i]; j++) {
-      partner_nshake[i][j] = 0;
-      m = atom->map(partner_tag[i][j]);
-      if (m >= 0 && m < nlocal) {
-        partner_nshake[i][j] = nshake[m];
-      } else {
-	proclist[nsend] = partner_tag[i][j] % nprocs;
-	inbuf[nsend].atomID = partner_tag[i][j];
-	inbuf[nsend].partnerID = tag[i];
-	inbuf[nsend].nshake = nshake[i];
-	nsend++;
-      }
-    }
-  }
-
-  // perform rendezvous operation
-  // each proc owns random subset of atoms
-  // receives all data needed to populate un-owned partner nshake
-
-  char *buf;
-  int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo),
-                                 0,proclist,
-                                 rendezvous_nshake,0,buf,sizeof(NShakeInfo),
-                                 (void *) this);
-  NShakeInfo *outbuf = (NShakeInfo *) buf;
-    
-  memory->destroy(proclist);
-  memory->sfree(inbuf);
-
-  // set partner nshake for un-onwed partners based on output info
-  // outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for
-
-  for (m = 0; m < nreturn; m++) {
-    i = atom->map(outbuf[m].atomID);
-    for (j = 0; j < npartner[i]; j++)
-      if (partner_tag[i][j] == outbuf[m].partnerID) break;
-    partner_nshake[i][j] = outbuf[m].nshake;
-  }
-
-  memory->sfree(outbuf);
-}
-
-/* ----------------------------------------------------------------------
-   setup shake_flag, shake_atom, shake_type
------------------------------------------------------------------------- */
-
-void FixShake::shake_info(int *npartner, tagint **partner_tag,
-			  int **partner_shake)
-{
-  int i,j,m,n;
-  int nlocal = atom->nlocal;
-
-  // nsend = # of my datums to send
-  // one datum for every off-processor partner
-  
-  int nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    for (j = 0; j < npartner[i]; j++) {
-      m = atom->map(partner_tag[i][j]);
-      if (m < 0 || m >= nlocal) nsend++;
-    }
-  }
-
-  int *proclist;
-  memory->create(proclist,nsend,"special:proclist");
-  ShakeInfo *inbuf = (ShakeInfo *) 
-    memory->smalloc((bigint) nsend*sizeof(ShakeInfo),"special:inbuf");
-
-  // set 3 shake arrays for all partner atoms I own
-  // also setup input buf to rendezvous comm
-  // input datums = partner atom where I do not own partner
-  // owning proc for each datum = partner_tag % nprocs
-  // datum: atomID = partner_tag (off-proc)
-  //        values in 3 shake arrays
-
-  nsend = 0;
-  for (i = 0; i < nlocal; i++) {
-    if (shake_flag[i] == 0) continue;
-    for (j = 0; j < npartner[i]; j++) {
-      if (partner_shake[i][j] == 0) continue;
-      m = atom->map(partner_tag[i][j]);
-      
-      if (m >= 0 && m < nlocal) {
-        shake_flag[m] = shake_flag[i];
-        shake_atom[m][0] = shake_atom[i][0];
-        shake_atom[m][1] = shake_atom[i][1];
-        shake_atom[m][2] = shake_atom[i][2];
-        shake_atom[m][3] = shake_atom[i][3];
-        shake_type[m][0] = shake_type[i][0];
-        shake_type[m][1] = shake_type[i][1];
-        shake_type[m][2] = shake_type[i][2];
-
-      } else {
-	proclist[nsend] = partner_tag[i][j] % nprocs;
-	inbuf[nsend].atomID = partner_tag[i][j];
-        inbuf[nsend].shake_flag = shake_flag[i];
-        inbuf[nsend].shake_atom[0] = shake_atom[i][0];
-        inbuf[nsend].shake_atom[1] = shake_atom[i][1];
-        inbuf[nsend].shake_atom[2] = shake_atom[i][2];
-        inbuf[nsend].shake_atom[3] = shake_atom[i][3];
-        inbuf[nsend].shake_type[0] = shake_type[i][0];
-        inbuf[nsend].shake_type[1] = shake_type[i][1];
-        inbuf[nsend].shake_type[2] = shake_type[i][2];
-	nsend++;
-      }
-    }
-  }
-
-  // perform rendezvous operation
-  // each proc owns random subset of atoms
-  // receives all data needed to populate un-owned shake info
-
-  char *buf;
-  int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo),
-                                 0,proclist,
-                                 rendezvous_shake,0,buf,sizeof(ShakeInfo),
-                                 (void *) this);
-  ShakeInfo *outbuf = (ShakeInfo *) buf;
-    
-  memory->destroy(proclist);
-  memory->sfree(inbuf);
-
-  // set shake info for un-onwed partners based on output info
-
-  for (m = 0; m < nreturn; m++) {
-    i = atom->map(outbuf[m].atomID);
-    shake_flag[i] = outbuf[m].shake_flag;
-    shake_atom[i][0] = outbuf[m].shake_atom[0];
-    shake_atom[i][1] = outbuf[m].shake_atom[1];
-    shake_atom[i][2] = outbuf[m].shake_atom[2];
-    shake_atom[i][3] = outbuf[m].shake_atom[3];
-    shake_type[i][0] = outbuf[m].shake_type[0];
-    shake_type[i][1] = outbuf[m].shake_type[1];
-    shake_type[i][2] = outbuf[m].shake_type[2];
-  }
-
-  memory->sfree(outbuf);
-}
-
-/* ----------------------------------------------------------------------
-   process data for atoms assigned to me in rendezvous decomposition
-   inbuf = list of N IDRvous datums
-   no outbuf
------------------------------------------------------------------------- */
-
-int FixShake::rendezvous_ids(int n, char *inbuf,
-			     int &flag, int *&proclist, char *&outbuf,
-			     void *ptr)
-{
-  FixShake *fsptr = (FixShake *) ptr;
-  Memory *memory = fsptr->memory;
-
-  tagint *atomIDs;
-  int *procowner;
-  
-  memory->create(atomIDs,n,"special:atomIDs");
-  memory->create(procowner,n,"special:procowner");
-  
-  IDRvous *in = (IDRvous *) inbuf;
-
-  for (int i = 0; i < n; i++) {
-    atomIDs[i] = in[i].atomID;
-    procowner[i] = in[i].me;
-  }
-
-  // store rendezvous data in FixShake class
-  
-  fsptr->nrvous = n;
-  fsptr->atomIDs = atomIDs;
-  fsptr->procowner = procowner;
-
-  // flag = 0: no second comm needed in rendezvous
-  
-  flag = 0;
-  return 0;
-}
-
-/* ----------------------------------------------------------------------
-   process data for atoms assigned to me in rendezvous decomposition
-   inbuf = list of N PairRvous datums
-   outbuf = same list of N PairRvous datums, routed to different procs
------------------------------------------------------------------------- */
-
-int FixShake::rendezvous_partners_info(int n, char *inbuf,
-				       int &flag, int *&proclist, char *&outbuf,
-				       void *ptr)
-{
-  int i,m;
-  
-  FixShake *fsptr = (FixShake *) ptr;
-  Atom *atom = fsptr->atom;
-  Memory *memory = fsptr->memory;
-
-  // clear atom map so it can be here as a hash table
-  // faster than an STL map for large atom counts
-
-  atom->map_clear();
-
-  // hash atom IDs stored in rendezvous decomposition
-  
-  int nrvous = fsptr->nrvous;
-  tagint *atomIDs = fsptr->atomIDs;
-
-  for (i = 0; i < nrvous; i++)
-    atom->map_one(atomIDs[i],i);
-
-  // proclist = owner of atomID in caller decomposition
-  // outbuf = info about owned atomID = 4 values
-  
-  PartnerInfo *in = (PartnerInfo *) inbuf;
-  int *procowner = fsptr->procowner;
-  memory->create(proclist,n,"shake:proclist");
-
-  double massone;
  int nmass = fsptr->nmass;

-  for (i = 0; i < n; i++) {
-    m = atom->map(in[i].atomID);
-    proclist[i] = procowner[m];
+  tagint *buf = (tagint *) cbuf;
+  int m,n;
+  double massone;
+
+  for (int i = 0; i < ndatum; i += 6) {
+    m = atom->map(buf[i+1]);
+    if (m >= 0 && m < nlocal) {
+      buf[i+2] = mask[m];
+      buf[i+3] = type[m];
+      if (nmass) {
+        if (rmass) massone = rmass[m];
+        else massone = mass[type[m]];
+        buf[i+4] = fsptr->masscheck(massone);
+      }
+      if (buf[i+5] == 0) {
+        n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0);
+        if (n) buf[i+5] = n;
+      }
+    }
  }
-
-  outbuf = inbuf;
-  
-  // re-create atom map
-
-  atom->map_init(0);
-  atom->nghost = 0;
-  atom->map_set();
-
-  // flag = 1: outbuf = inbuf
-  
-  flag = 1;
-  return n;
 }

 /* ----------------------------------------------------------------------
-   process data for atoms assigned to me in rendezvous decomposition
-   inbuf = list of N NShakeInfo datums
-   outbuf = same list of N NShakeInfo datums, routed to different procs
+   when receive buffer, scan bond partner IDs for atoms I own
+   if I own partner, fill in nshake value
 ------------------------------------------------------------------------- */

-int FixShake::rendezvous_nshake(int n, char *inbuf,
-				int &flag, int *&proclist, char *&outbuf,
-				void *ptr)
+void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr)
 {
-  int i,j,m;
-  
-  FixShake *fsptr = (FixShake *) ptr;
+  FixShake *fsptr = (FixShake *)ptr;
  Atom *atom = fsptr->atom;
-  Memory *memory = fsptr->memory;
+  int nlocal = atom->nlocal;

-  // clear atom map so it can be here as a hash table
-  // faster than an STL map for large atom counts
+  int *nshake = fsptr->nshake;

-  atom->map_clear();
+  tagint *buf = (tagint *) cbuf;
+  int m;

-  // hash atom IDs stored in rendezvous decomposition
-  
-  int nrvous = fsptr->nrvous;
-  tagint *atomIDs = fsptr->atomIDs;
-
-  for (i = 0; i < nrvous; i++)
-    atom->map_one(atomIDs[i],i);
-
-  // proclist = owner of atomID in caller decomposition
-  // outbuf = info about owned atomID
-  
-  NShakeInfo *in = (NShakeInfo *) inbuf;
-  int *procowner = fsptr->procowner;
-  memory->create(proclist,n,"shake:proclist");
-  
-  for (i = 0; i < n; i++) {
-    m = atom->map(in[i].atomID);
-    proclist[i] = procowner[m];
+  for (int i = 0; i < ndatum; i += 3) {
+    m = atom->map(buf[i+1]);
+    if (m >= 0 && m < nlocal) buf[i+2] = nshake[m];
  }
-
-  outbuf = inbuf;
-  
-  // re-create atom map
-
-  atom->map_init(0);
-  atom->nghost = 0;
-  atom->map_set();
-
-  // flag = 1: outbuf = inbuf
-  
-  flag = 1;
-  return n;
 }
+
 /* ----------------------------------------------------------------------
-   process data for atoms assigned to me in rendezvous decomposition
-   inbuf = list of N PairRvous datums
-   outbuf = same list of N PairRvous datums, routed to different procs
+   when receive buffer, scan bond partner IDs for atoms I own
+   if I own partner, fill in nshake value
 ------------------------------------------------------------------------- */

-int FixShake::rendezvous_shake(int n, char *inbuf,
-                               int &flag, int *&proclist, char *&outbuf,
-                               void *ptr)
+void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr)
 {
-  int i,j,m;
-  
-  FixShake *fsptr = (FixShake *) ptr;
+  FixShake *fsptr = (FixShake *)ptr;
  Atom *atom = fsptr->atom;
-  Memory *memory = fsptr->memory;
+  int nlocal = atom->nlocal;

-  // clear atom map so it can be here as a hash table
-  // faster than an STL map for large atom counts
+  int *shake_flag = fsptr->shake_flag;
+  tagint **shake_atom = fsptr->shake_atom;
+  int **shake_type = fsptr->shake_type;

-  atom->map_clear();
+  tagint *buf = (tagint *) cbuf;
+  int m;

-  // hash atom IDs stored in rendezvous decomposition
-  
-  int nrvous = fsptr->nrvous;
-  tagint *atomIDs = fsptr->atomIDs;
-
-  for (i = 0; i < nrvous; i++)
-    atom->map_one(atomIDs[i],i);
-
-  // proclist = owner of atomID in caller decomposition
-  // outbuf = info about owned atomID
-  
-  ShakeInfo *in = (ShakeInfo *) inbuf;
-  int *procowner = fsptr->procowner;
-  memory->create(proclist,n,"shake:proclist");
-
-  for (i = 0; i < n; i++) {
-    m = atom->map(in[i].atomID);
-    proclist[i] = procowner[m];
+  for (int i = 0; i < ndatum; i += 9) {
+    m = atom->map(buf[i]);
+    if (m >= 0 && m < nlocal) {
+      shake_flag[m] = buf[i+1];
+      shake_atom[m][0] = buf[i+2];
+      shake_atom[m][1] = buf[i+3];
+      shake_atom[m][2] = buf[i+4];
+      shake_atom[m][3] = buf[i+5];
+      shake_type[m][0] = buf[i+6];
+      shake_type[m][1] = buf[i+7];
+      shake_type[m][2] = buf[i+8];
+    }
  }
-
-  outbuf = inbuf;
-  
-  // re-create atom map
-
-  atom->map_init(0);
-  atom->nghost = 0;
-  atom->map_set();
-
-  // flag = 1: outbuf = inbuf;
-  
-  flag = 1;
-  return n;
 }

 /* ----------------------------------------------------------------------
--- a/src/RIGID/fix_shake.h
+++ b/src/RIGID/fix_shake.h
@ -120,11 +120,6 @@ class FixShake : public Fix {
  int nmol;

  void find_clusters();
-  void atom_owners();
-  void partner_info(int *, tagint **, int **, int **, int **, int **);
-  void nshake_info(int *, tagint **, int **);
-  void shake_info(int *, tagint **, int **);
-  
  int masscheck(double);
  void unconstrained_update();
  void unconstrained_update_respa(int);
@ -136,40 +131,12 @@ class FixShake : public Fix {
  int bondtype_findset(int, tagint, tagint, int);
  int angletype_findset(int, tagint, tagint, int);

-  // data used by rendezvous callback methods
+  // static variable for ring communication callback to access class data
+  // callback functions for ring communication

-  int nrvous;
-  tagint *atomIDs;
-  int *procowner;
-
-  struct IDRvous {
-    int me;
-    tagint atomID;
-  };
-
-  struct PartnerInfo {
-    tagint atomID,partnerID;
-    int mask,type,massflag,bondtype;
-  };
-
-  struct NShakeInfo {
-    tagint atomID,partnerID;
-    int nshake;
-  };
-
-  struct ShakeInfo {
-    tagint atomID;
-    tagint shake_atom[4];
-    int shake_flag;
-    int shake_type[3];
-  };
-
-  // callback functions for rendezvous communication
-
-  static int rendezvous_ids(int, char *, int &, int *&, char *&, void *);
-  static int rendezvous_partners_info(int, char *, int &, int *&, char *&, void *);
-  static int rendezvous_nshake(int, char *, int &, int *&, char *&, void *);
-  static int rendezvous_shake(int, char *, int &, int *&, char *&, void *);
+  static void ring_bonds(int, char *, void *);
+  static void ring_nshake(int, char *, void *);
+  static void ring_shake(int, char *, void *);
 };

 }
--- a/src/comm.cpp
+++ b/src/comm.cpp
@ -28,7 +28,6 @@
 #include "dump.h"
 #include "group.h"
 #include "procmap.h"
-#include "irregular.h"
 #include "accelerator_kokkos.h"
 #include "memory.h"
 #include "error.h"
@ -726,433 +725,6 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag,
  memory->destroy(bufcopy);
 }

-/* ----------------------------------------------------------------------
-   rendezvous communication operation
-   three stages:
-     first comm sends inbuf from caller decomp to rvous decomp
-     callback operates on data in rendevous decomp
-     second comm sends outbuf from rvous decomp back to caller decomp
-   inputs:
-     which = perform (0) irregular or (1) MPI_All2allv communication
-     n = # of datums in inbuf
-     inbuf = vector of input datums
-     insize = byte size of each input datum
-     inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc
-     procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc, 
-     callback = caller function to invoke in rendezvous decomposition
-                takes input datums, returns output datums
-     outorder = same as inorder, but for datums returned by callback()
-     ptr = pointer to caller class, passed to callback()
-   outputs:
-     nout = # of output datums (function return)
-     outbuf = vector of output datums
-     outsize = byte size of each output datum
-   callback inputs:
-     nrvous = # of rvous decomp datums in inbuf_rvous
-     inbuf_rvous = vector of rvous decomp input datums
-     ptr = pointer to caller class
-   callback outputs:
-     nrvous_out = # of rvous decomp output datums (function return)
-     flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous,
-            2 for second comm with new outbuf_rvous
-     procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc
-                   allocated
-     outbuf_rvous = vector of rvous decomp output datums
-   NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR
-         to avoid checked-for overflow in MPI_Alltoallv?
------------------------------------------------------------------------- */
-
-int Comm::
-rendezvous(int which, int n, char *inbuf, int insize,
-           int inorder, int *procs,
-           int (*callback)(int, char *, int &, int *&, char *&, void *),
-           int outorder, char *&outbuf, int outsize, void *ptr, int statflag)
-{
-  if (which == 0)
-    return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback,
-                                outorder,outbuf,outsize,ptr,statflag);
-  else
-    return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback,
-                              outorder,outbuf,outsize,ptr,statflag);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int Comm::
-rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs,
-                     int (*callback)(int, char *, int &, int *&, char *&, void *),
-                     int outorder, char *&outbuf, 
-                     int outsize, void *ptr, int statflag)
-{
-  // irregular comm of inbuf from caller decomp to rendezvous decomp
-  
-  Irregular *irregular = new Irregular(lmp);
-
-  int nrvous;
-  if (inorder) nrvous = irregular->create_data_grouped(n,procs);
-  else nrvous = irregular->create_data(n,procs);
-
-  char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
-                                               "rendezvous:inbuf");
-  irregular->exchange_data(inbuf,insize,inbuf_rvous);
-
-  bigint irregular1_bytes = irregular->memory_usage();
-  irregular->destroy_data();
-  delete irregular;
-
-  // peform rendezvous computation via callback()
-  // callback() allocates/populates proclist_rvous and outbuf_rvous
-
-  int flag;
-  int *procs_rvous;
-  char *outbuf_rvous;
-  int nrvous_out = callback(nrvous,inbuf_rvous,flag,
-                            procs_rvous,outbuf_rvous,ptr);
-
-  if (flag != 1) memory->sfree(inbuf_rvous);  // outbuf_rvous = inbuf_vous
-  if (flag == 0) {
-    if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize,
-                                   (bigint) nrvous_out*sizeof(int) + 
-                                   irregular1_bytes);
-    return 0;    // all nout_rvous are 0, no 2nd comm stage
-  }
-
-  // irregular comm of outbuf from rendezvous decomp back to caller decomp
-  // caller will free outbuf
-
-  irregular = new Irregular(lmp);
-  
-  int nout;
-  if (outorder) 
-    nout = irregular->create_data_grouped(nrvous_out,procs_rvous);
-  else nout = irregular->create_data(nrvous_out,procs_rvous);
-
-  outbuf = (char *) memory->smalloc((bigint) nout*outsize,
-                                    "rendezvous:outbuf");
-  irregular->exchange_data(outbuf_rvous,outsize,outbuf);
-
-  bigint irregular2_bytes = irregular->memory_usage();
-  irregular->destroy_data();
-  delete irregular;
-
-  memory->destroy(procs_rvous);
-  memory->sfree(outbuf_rvous);
-
-  // return number of output datums
-  // last arg to stats() = memory for procs_rvous + irregular comm
-
-  if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize,
-                                 (bigint) nrvous_out*sizeof(int) + 
-                                 MAX(irregular1_bytes,irregular2_bytes));
-  return nout;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int Comm::
-rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs,
-                   int (*callback)(int, char *, int &, int *&, char *&, void *),
-                   int outorder, char *&outbuf, int outsize, void *ptr,
-                   int statflag)
-{
-  int iproc;
-  bigint all2all1_bytes,all2all2_bytes;
-  int *sendcount,*sdispls,*recvcount,*rdispls;
-  int *procs_a2a;
-  bigint *offsets;
-  char *inbuf_a2a,*outbuf_a2a;
-
-  // create procs and inbuf for All2all if necesary
-  
-  if (!inorder) {
-    memory->create(procs_a2a,nprocs,"rendezvous:procs");
-    inbuf_a2a = (char *) memory->smalloc((bigint) n*insize,
-                                         "rendezvous:inbuf");
-    memory->create(offsets,nprocs,"rendezvous:offsets");
-
-    for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
-    for (int i = 0; i < n; i++) procs_a2a[procs[i]]++;
-
-    offsets[0] = 0;
-    for (int i = 1; i < nprocs; i++)
-      offsets[i] = offsets[i-1] + insize*procs_a2a[i-1];
-
-    bigint offset = 0;
-    for (int i = 0; i < n; i++) {
-      iproc = procs[i];
-      memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize);
-      offsets[iproc] += insize;
-      offset += insize;
-    }
-
-    all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize;
-
-  } else {
-    procs_a2a = procs;
-    inbuf_a2a = inbuf;
-    all2all1_bytes = 0;
-  }
-
-  // create args for MPI_Alltoallv() on input data
-  
-  memory->create(sendcount,nprocs,"rendezvous:sendcount");
-  memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
-  
-  memory->create(recvcount,nprocs,"rendezvous:recvcount");
-  MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
-
-  memory->create(sdispls,nprocs,"rendezvous:sdispls");
-  memory->create(rdispls,nprocs,"rendezvous:rdispls");
-  sdispls[0] = rdispls[0] = 0;
-  for (int i = 1; i < nprocs; i++) {
-    sdispls[i] = sdispls[i-1] + sendcount[i-1];
-    rdispls[i] = rdispls[i-1] + recvcount[i-1];
-  }
-  int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1];
-
-  // test for overflow of input data due to imbalance or insize
-  // means that individual sdispls or rdispls values overflow
-  
-  int overflow = 0;
-  if ((bigint) n*insize > MAXSMALLINT) overflow = 1;
-  if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1;
-  int overflowall;
-  MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
-  if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a");
-  
-  for (int i = 0; i < nprocs; i++) {
-    sendcount[i] *= insize;
-    sdispls[i] *= insize;
-    recvcount[i] *= insize;
-    rdispls[i] *= insize;
-  }
-
-  // all2all comm of inbuf from caller decomp to rendezvous decomp
-
-  char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
-                                               "rendezvous:inbuf");
-
-  MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR,
-		inbuf_rvous,recvcount,rdispls,MPI_CHAR,world);
-
-  if (!inorder) {
-    memory->destroy(procs_a2a);
-    memory->sfree(inbuf_a2a);
-    memory->destroy(offsets);
-  }
-
-  // peform rendezvous computation via callback()
-  // callback() allocates/populates proclist_rvous and outbuf_rvous
-
-  int flag;
-  int *procs_rvous;
-  char *outbuf_rvous;
-
-  int nrvous_out = callback(nrvous,inbuf_rvous,flag,
-                            procs_rvous,outbuf_rvous,ptr);
-
-  if (flag != 1) memory->sfree(inbuf_rvous);  // outbuf_rvous = inbuf_vous
-  if (flag == 0) {
-    memory->destroy(sendcount);
-    memory->destroy(recvcount);
-    memory->destroy(sdispls);
-    memory->destroy(rdispls);
-    if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize,
-                                   (bigint) nrvous_out*sizeof(int) + 
-                                   4*nprocs*sizeof(int) + all2all1_bytes);
-    return 0;    // all nout_rvous are 0, no 2nd irregular
-  }
-
-
-
-
-
-
-  // create procs and outbuf for All2all if necesary
-  
-  if (!outorder) {
-    memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs");
-
-    outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize,
-                                          "rendezvous:outbuf");
-    memory->create(offsets,nprocs,"rendezvous:offsets");
-
-    for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
-    for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++;
-
-    offsets[0] = 0;
-    for (int i = 1; i < nprocs; i++)
-      offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1];
-
-    bigint offset = 0;
-    for (int i = 0; i < nrvous_out; i++) {
-      iproc = procs_rvous[i];
-      memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize);
-      offsets[iproc] += outsize;
-      offset += outsize;
-    }
-    
-    all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + 
-      nrvous_out*outsize;
-
-  } else {
-    procs_a2a = procs_rvous;
-    outbuf_a2a = outbuf_rvous;
-    all2all2_bytes = 0;
-  }
-
-  // comm outbuf from rendezvous decomposition back to caller
-
-  memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
-  
-  MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
-  
-  sdispls[0] = rdispls[0] = 0;
-  for (int i = 1; i < nprocs; i++) {
-    sdispls[i] = sdispls[i-1] + sendcount[i-1];
-    rdispls[i] = rdispls[i-1] + recvcount[i-1];
-  }
-  int nout = rdispls[nprocs-1] + recvcount[nprocs-1];
-
-  // test for overflow of outbuf due to imbalance or outsize
-  // means that individual sdispls or rdispls values overflow
-  
-  overflow = 0;
-  if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1;
-  if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1;
-  MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
-  if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a");
-  
-  for (int i = 0; i < nprocs; i++) {
-    sendcount[i] *= outsize;
-    sdispls[i] *= outsize;
-    recvcount[i] *= outsize;
-    rdispls[i] *= outsize;
-  }
-
-  // all2all comm of outbuf from rendezvous decomp back to caller decomp
-  // caller will free outbuf
-
-  outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf");
-
-  MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR,
-		outbuf,recvcount,rdispls,MPI_CHAR,world);
-
-  memory->destroy(procs_rvous);
-  memory->sfree(outbuf_rvous);
-
-  if (!outorder) {
-    memory->destroy(procs_a2a);
-    memory->sfree(outbuf_a2a);
-    memory->destroy(offsets);
-  }
-
-  // clean up
-
-  memory->destroy(sendcount);
-  memory->destroy(recvcount);
-  memory->destroy(sdispls);
-  memory->destroy(rdispls);
-
-  // return number of output datums
-  // last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops
-
-  if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize,
-                                 (bigint) nrvous_out*sizeof(int) + 
-                                 4*nprocs*sizeof(int) + 
-                                 MAX(all2all1_bytes,all2all2_bytes));
-  return nout;
-}
-
-/* ----------------------------------------------------------------------
-   print balance and memory info for rendezvous operation
-   useful for debugging
------------------------------------------------------------------------- */
-
-void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out,
-                            int insize, int outsize, bigint commsize)
-{
-  bigint size_in_all,size_in_max,size_in_min;
-  bigint size_out_all,size_out_max,size_out_min;
-  bigint size_inrvous_all,size_inrvous_max,size_inrvous_min;
-  bigint size_outrvous_all,size_outrvous_max,size_outrvous_min;
-  bigint size_comm_all,size_comm_max,size_comm_min;
-
-  bigint size = (bigint) n*insize;
-  MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
-  MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
-  MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
-
-  size = (bigint) nout*outsize;
-  MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
-  MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
-  MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
-
-  size = (bigint) nrvous*insize;
-  MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
-  MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
-  MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
-
-  size = (bigint) nrvous_out*insize;
-  MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
-  MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
-  MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
-
-  size = commsize;
-  MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
-  MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
-  MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
-
-  int mbytes = 1024*1024;
-
-  if (me == 0) {
-    if (screen) {
-      fprintf(screen,"Rendezvous balance and memory info: (tot,ave,max,min) \n");
-      fprintf(screen,"  input datum count: "
-              BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-              size_in_all/insize,1.0*size_in_all/nprocs/insize,
-              size_in_max/insize,size_in_min/insize);
-      fprintf(screen,"  input data (MB): %g %g %g %g\n",
-              1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes,
-              1.0*size_in_max/mbytes,1.0*size_in_min/mbytes);
-      if (outsize)
-        fprintf(screen,"  output datum count: "
-                BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-                size_out_all/outsize,1.0*size_out_all/nprocs/outsize,
-                size_out_max/outsize,size_out_min/outsize);
-      else 
-        fprintf(screen,"  output datum count: "
-                BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-                0,0.0,0,0);
-      fprintf(screen,"  output data (MB): %g %g %g %g\n",
-              1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes,
-              1.0*size_out_max/mbytes,1.0*size_out_min/mbytes);
-      fprintf(screen,"  input rvous datum count: "
-              BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-              size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize,
-              size_inrvous_max/insize,size_inrvous_min/insize);
-      fprintf(screen,"  input rvous data (MB): %g %g %g %g\n",
-              1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes,
-              1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes);
-      if (outsize)
-        fprintf(screen,"  output rvous datum count: "
-                BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-                size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize,
-                size_outrvous_max/outsize,size_outrvous_min/outsize);
-      else
-        fprintf(screen,"  output rvous datum count: "
-                BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
-                0,0.0,0,0);
-      fprintf(screen,"  output rvous data (MB): %g %g %g %g\n",
-              1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes,
-              1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes);
-      fprintf(screen,"  rvous comm (MB): %g %g %g %g\n",
-              1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes,
-              1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes);
-    }
-  }
-}
-
 /* ----------------------------------------------------------------------
   proc 0 reads Nlines from file into buf and bcasts buf to all procs
   caller allocates buf to max size needed
--- a/src/comm.h
+++ b/src/comm.h
@ -109,10 +109,6 @@ class Comm : protected Pointers {

  void ring(int, int, void *, int, void (*)(int, char *, void *),
            void *, void *, int self = 1);
-  int rendezvous(int, int, char *, int, int, int *, 
-                 int (*)(int, char *, int &, int *&, char *&, void *), 
-                 int, char *&, int, void *, int statflag=0);
-
  int read_lines_from_file(FILE *, int, int, char *);
  int read_lines_from_file_universe(FILE *, int, int, char *);

@ -146,15 +142,6 @@ class Comm : protected Pointers {
  int ncores;                       // # of cores per node
  int coregrid[3];                  // 3d grid of cores within a node
  int user_coregrid[3];             // user request for cores in each dim
-
-  int rendezvous_irregular(int, char *, int, int, int *, 
-                           int (*)(int, char *, int &, int *&, char *&, void *), 
-                           int, char *&, int, void *, int);
-  int rendezvous_all2all(int, char *, int, int, int *, 
-                         int (*)(int, char *, int &, int *&, char *&, void *), 
-                         int, char *&, int, void *, int);
-  void rendezvous_stats(int, int, int, int, int, int, bigint);
-
 public:
  enum{MULTIPLE};
 };
--- a/src/create_atoms.cpp
+++ b/src/create_atoms.cpp
@ -514,6 +514,9 @@ void CreateAtoms::command(int narg, char **arg)
    if (domain->triclinic) domain->lamda2x(atom->nlocal);
  }

+  MPI_Barrier(world);
+  double time2 = MPI_Wtime();
+
  // clean up

  delete ranmol;
@ -523,6 +526,21 @@ void CreateAtoms::command(int narg, char **arg)
  delete [] ystr;
  delete [] zstr;

+  // print status
+
+  if (comm->me == 0) {
+    if (screen) {
+      fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
+              atom->natoms-natoms_previous);
+      fprintf(screen,"  Time spent = %g secs\n",time2-time1);
+    }
+    if (logfile) {
+      fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
+              atom->natoms-natoms_previous);
+      fprintf(logfile,"  Time spent = %g secs\n",time2-time1);
+    }
+  }
+
  // for MOLECULE mode:
  // create special bond lists for molecular systems,
  //   but not for atom style template
@ -532,25 +550,6 @@ void CreateAtoms::command(int narg, char **arg)
    if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) {
      Special special(lmp);
      special.build();
-
-    }
-  }
-
-  // print status
-
-  MPI_Barrier(world);
-  double time2 = MPI_Wtime();
-
-  if (comm->me == 0) {
-    if (screen) {
-      fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
-              atom->natoms-natoms_previous);
-      fprintf(screen,"  create_atoms CPU = %g secs\n",time2-time1);
-    }
-    if (logfile) {
-      fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
-              atom->natoms-natoms_previous);
-      fprintf(logfile,"  create_atoms CPU = %g secs\n",time2-time1);
    }
  }
 }
--- a/src/hashlittle.cpp
+++ b/src/hashlittle.cpp
@ -1,349 +0,0 @@
-// Hash function hashlittle()
-// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
-// bob_jenkins@burtleburtle.net
-
-#include <cmath>
-#include <stddef.h>
-#include <stdint.h>
-
-// if the system defines the __BYTE_ORDER__ define,
-// we use it instead of guessing the platform
-
-#if defined(__BYTE_ORDER__)
-# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#  define HASH_LITTLE_ENDIAN 1
-# else
-#  define HASH_LITTLE_ENDIAN 0
-# endif
-#else   // heuristic platform guess
-# if defined(__bg__)
-#  define HASH_LITTLE_ENDIAN 0       // IBM BlueGene is big endian
-# else
-#  define HASH_LITTLE_ENDIAN 1       // Intel and AMD x86 are little endian
-# endif
-#endif
-
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-/*
-------------------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-
-This is reversible, so any information in (a,b,c) before mix() is
-still in (a,b,c) after mix().
-
-If four pairs of (a,b,c) inputs are run through mix(), or through
-mix() in reverse, there are at least 32 bits of the output that
-are sometimes the same for one pair and different for another pair.
-This was tested for:
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or 
-  all zero plus a counter that starts at zero.
-
-Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
-satisfy this are
-    4  6  8 16 19  4
-    9 15  3 18 27 15
-   14  9  3  7 17  3
-Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
-for "differ" defined as + with a one-bit base and a two-bit delta.  I
-used http://burtleburtle.net/bob/hash/avalanche.html to choose 
-the operations, constants, and arrangements of the variables.
-
-This does not achieve avalanche.  There are input bits of (a,b,c)
-that fail to affect some output bits of (a,b,c), especially of a.  The
-most thoroughly mixed value is c, but it doesn't really even achieve
-avalanche in c.
-
-This allows some parallelism.  Read-after-writes are good at doubling
-the number of bits affected, so the goal of mixing pulls in the opposite
-direction as the goal of parallelism.  I did what I could.  Rotates
-seem to cost as much as shifts on every machine I could lay my hands
-on, and rotates are much kinder to the top and bottom bits, so I used
-rotates.
-------------------------------------------------------------------------------
-*/
-#define mix(a,b,c) \
-{ \
-  a -= c;  a ^= rot(c, 4);  c += b; \
-  b -= a;  b ^= rot(a, 6);  a += c; \
-  c -= b;  c ^= rot(b, 8);  b += a; \
-  a -= c;  a ^= rot(c,16);  c += b; \
-  b -= a;  b ^= rot(a,19);  a += c; \
-  c -= b;  c ^= rot(b, 4);  b += a; \
-}
-
-/*
-------------------------------------------------------------------------------
-final -- final mixing of 3 32-bit values (a,b,c) into c
-
-Pairs of (a,b,c) values differing in only a few bits will usually
-produce values of c that look totally different.  This was tested for
-* pairs that differed by one bit, by two bits, in any combination
-  of top bits of (a,b,c), or in any combination of bottom bits of
-  (a,b,c).
-* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
-  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
-  is commonly produced by subtraction) look like a single 1-bit
-  difference.
-* the base values were pseudorandom, all zero but one bit set, or 
-  all zero plus a counter that starts at zero.
-
-These constants passed:
- 14 11 25 16 4 14 24
- 12 14 25 16 4 14 24
-and these came close:
-  4  8 15 26 3 22 24
- 10  8 15 26 3 22 24
- 11  8 15 26 3 22 24
-------------------------------------------------------------------------------
-*/
-#define final(a,b,c) \
-{ \
-  c ^= b; c -= rot(b,14); \
-  a ^= c; a -= rot(c,11); \
-  b ^= a; b -= rot(a,25); \
-  c ^= b; c -= rot(b,16); \
-  a ^= c; a -= rot(c,4);  \
-  b ^= a; b -= rot(a,14); \
-  c ^= b; c -= rot(b,24); \
-}
-
-/*
-------------------------------------------------------------------------------
-hashlittle() -- hash a variable-length key into a 32-bit value
-  k       : the key (the unaligned variable-length array of bytes)
-  length  : the length of the key, counting by bytes
-  initval : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Two keys differing by one or two bits will have
-totally different hash values.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (uint8_t **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
-
-By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-Use for hash table lookup, or anything where one collision in 2^^32 is
-acceptable.  Do NOT use for cryptographic purposes.
-------------------------------------------------------------------------------
-*/
-
-uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
-{
-#ifndef PURIFY_HATES_HASHLITTLE
-
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-    const uint8_t  *k8;
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /* 
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : return c;              /* zero length strings require no mixing */
-    }
-
-#else /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : return c;
-    }
-
-#endif /* !valgrind */
-
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* fall through */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* fall through */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : return c;                     /* zero length requires no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24;
-    case 11: c+=((uint32_t)k[10])<<16;
-    case 10: c+=((uint32_t)k[9])<<8;
-    case 9 : c+=k[8];
-    case 8 : b+=((uint32_t)k[7])<<24;
-    case 7 : b+=((uint32_t)k[6])<<16;
-    case 6 : b+=((uint32_t)k[5])<<8;
-    case 5 : b+=k[4];
-    case 4 : a+=((uint32_t)k[3])<<24;
-    case 3 : a+=((uint32_t)k[2])<<16;
-    case 2 : a+=((uint32_t)k[1])<<8;
-    case 1 : a+=k[0];
-             break;
-    case 0 : return c;
-    }
-  }
-
-  final(a,b,c);
-  return c;
-
-#else  /* PURIFY_HATES_HASHLITTLE */
-/* I don't know what it is about Jenkins' hashlittle function, but
- * it drives purify insane, even with VALGRIND defined.  It makes 
- * purify unusable!!  The code execution doesn't even make sense.
- * Below is a (probably) weaker hash function that at least allows 
- * testing with purify.
- */
-#define MAXINT_DIV_PHI  11400714819323198485U
-
-  uint32_t h, rest, *p, bytes, num_bytes;
-  char *byteptr;
-
-  num_bytes = length;
-
-  /* First hash the uint32_t-sized portions of the key */
-  h = 0;
-  for (p = (uint32_t *)key, bytes=num_bytes;
-       bytes >= (uint32_t) sizeof(uint32_t);
-       bytes-=sizeof(uint32_t), p++){
-    h = (h^(*p))*MAXINT_DIV_PHI;
-  }
-
-  /* Then take care of the remaining bytes, if any */
-  rest = 0;
-  for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){
-    rest = (rest<<8) | (*byteptr);
-  }
-
-  /* If extra bytes, merge the two parts */
-  if (rest)
-    h = (h^rest)*MAXINT_DIV_PHI;
-
-  return h;
-#endif /* PURIFY_HATES_HASHLITTLE */
-}
--- a/src/hashlittle.h
+++ b/src/hashlittle.h
@ -1,5 +0,0 @@
-// Hash function hashlittle()
-// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
-// bob_jenkins@burtleburtle.net
-
-uint32_t hashlittle(const void *key, size_t length, uint32_t);
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@ -501,8 +501,7 @@ int compare_standalone(const int i, const int j, void *ptr)

 void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
 {
-  int i,m,n,count;
-  bigint offset;
+  int i,m,n,offset,count;

  // post all receives

@ -622,7 +621,6 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
  num_send = new int[nsend_proc];
  index_send = new int[n-work1[me]];
  index_self = new int[work1[me]];
-  maxindex = n;

  // proc_send = procs I send to
  // num_send = # of datums I send to each proc
@ -680,182 +678,8 @@ int Irregular::create_data(int n, int *proclist, int sortflag)

  // receive incoming messages
  // proc_recv = procs I recv from
-  // num_recv = # of datums each proc sends me
-  // nrecvdatum = total # of datums I recv
-
-  int nrecvdatum = 0;
-  for (i = 0; i < nrecv_proc; i++) {
-    MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
-    proc_recv[i] = status->MPI_SOURCE;
-    nrecvdatum += num_recv[i];
-  }
-  nrecvdatum += num_self;
-
-  // sort proc_recv and num_recv by proc ID if requested
-  // useful for debugging to insure reproducible ordering of received datums
-
-  if (sortflag) {
-    int *order = new int[nrecv_proc];
-    int *proc_recv_ordered = new int[nrecv_proc];
-    int *num_recv_ordered = new int[nrecv_proc];
-
-    for (i = 0; i < nrecv_proc; i++) order[i] = i;
-
-#if defined(LMP_QSORT)
-    proc_recv_copy = proc_recv;
-    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
-#else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
-#endif
-
-    int j;
-    for (i = 0; i < nrecv_proc; i++) {
-      j = order[i];
-      proc_recv_ordered[i] = proc_recv[j];
-      num_recv_ordered[i] = num_recv[j];
-    }
-
-    memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
-    memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
-    delete [] order;
-    delete [] proc_recv_ordered;
-    delete [] num_recv_ordered;
-  }
-
-  // barrier to insure all MPI_ANY_SOURCE messages are received
-  // else another proc could proceed to exchange_data() and send to me
-
-  MPI_Barrier(world);
-
-  // return # of datums I will receive
-
-  return nrecvdatum;
-}
-
-/* ----------------------------------------------------------------------
-   create communication plan based on list of datums of uniform size
-   n = # of datums to send
-   procs = how many datums to send to each proc, must include self
-   sort = flag for sorting order of received messages by proc ID
-   return total # of datums I will recv, including any to self
------------------------------------------------------------------------- */
-
-int Irregular::create_data_grouped(int n, int *procs, int sortflag)
-{
-  int i,j,k,m;
-
-  // setup for collective comm
-  // work1 = # of datums I send to each proc, set self to 0
-  // work2 = 1 for all procs, used for ReduceScatter
-
-  for (i = 0; i < nprocs; i++) {
-    work1[i] = procs[i];
-    work2[i] = 1;
-  }
-  work1[me] = 0;
-
-  // nrecv_proc = # of procs I receive messages from, not including self
-  // options for performing ReduceScatter operation
-  // some are more efficient on some machines at big sizes
-
-#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
-  MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
-  nrecv_proc = work1[me];
-#else 
-#ifdef LAMMPS_RS_ALLREDUCE
-  MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
-  nrecv_proc = work2[me];
-#else
-  MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
-#endif
-#endif
-
-  // allocate receive arrays
-
-  proc_recv = new int[nrecv_proc];
-  num_recv = new int[nrecv_proc];
-  request = new MPI_Request[nrecv_proc];
-  status = new MPI_Status[nrecv_proc];
-
-  // work1 = # of datums I send to each proc, including self
-  // nsend_proc = # of procs I send messages to, not including self
-
-  for (i = 0; i < nprocs; i++) work1[i] = procs[i];
-
-  nsend_proc = 0;
-  for (i = 0; i < nprocs; i++)
-    if (work1[i]) nsend_proc++;
-  if (work1[me]) nsend_proc--;
-
-  // allocate send and self arrays
-
-  proc_send = new int[nsend_proc];
-  num_send = new int[nsend_proc];
-  index_send = new int[n-work1[me]];
-  index_self = new int[work1[me]];
-  maxindex = n;
-
-  // proc_send = procs I send to
-  // num_send = # of datums I send to each proc
-  // num_self = # of datums I copy to self
-  // to balance pattern of send messages:
-  //   each proc begins with iproc > me, continues until iproc = me
-  // reset work1 to store which send message each proc corresponds to
-
-  int iproc = me;
-  int isend = 0;
-  for (i = 0; i < nprocs; i++) {
-    iproc++;
-    if (iproc == nprocs) iproc = 0;
-    if (iproc == me) {
-      num_self = work1[iproc];
-      work1[iproc] = 0;
-    } else if (work1[iproc] > 0) {
-      proc_send[isend] = iproc;
-      num_send[isend] = work1[iproc];
-      work1[iproc] = isend;
-      isend++;
-    }
-  }
-
-  // work2 = offsets into index_send for each proc I send to
-  // m = ptr into index_self
-  // index_send = list of which datums to send to each proc
-  //   1st N1 values are datum indices for 1st proc,
-  //   next N2 values are datum indices for 2nd proc, etc
-  // index_self = list of which datums to copy to self
-
-  work2[0] = 0;
-  for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
-
-  m = 0;
-  i = 0;
-  for (iproc = 0; iproc < nprocs; iproc++) {
-    k = procs[iproc];
-    for (j = 0; j < k; j++) {
-      if (iproc == me) index_self[m++] = i++;
-      else {
-        isend = work1[iproc];
-        index_send[work2[isend]++] = i++;
-      }
-    }
-  }
-
-  // tell receivers how much data I send
-  // sendmax_proc = largest # of datums I send in a single message
-
-  sendmax_proc = 0;
-  for (i = 0; i < nsend_proc; i++) {
-    MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock
-    MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq);
-    MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion
-    sendmax_proc = MAX(sendmax_proc,num_send[i]);
-  }
-
-  // receive incoming messages
-  // proc_recv = procs I recv from
-  // num_recv = # of datums each proc sends me
-  // nrecvdatum = total # of datums I recv
+  // num_recv = total size of message each proc sends me
+  // nrecvdatum = total size of data I recv

  int nrecvdatum = 0;
  for (i = 0; i < nrecv_proc; i++) {
@ -915,13 +739,11 @@ int Irregular::create_data_grouped(int n, int *procs, int sortflag)

 void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
 {
-  int i,n,count;
-  bigint m;       // these 2 lines enable send/recv buf to be larger than 2 GB
-  char *dest;
+  int i,m,n,offset,count;

  // post all receives, starting after self copies

-  bigint offset = num_self*nbytes;
+  offset = num_self*nbytes;
  for (int irecv = 0; irecv < nrecv_proc; irecv++) {
    MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
              proc_recv[irecv],0,world,&request[irecv]);
@ -943,34 +765,23 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
  n = 0;
  for (int isend = 0; isend < nsend_proc; isend++) {
    count = num_send[isend];
-    dest = buf;
    for (i = 0; i < count; i++) {
      m = index_send[n++];
-      memcpy(dest,&sendbuf[m*nbytes],nbytes);
-      dest += nbytes;
+      memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
    }
    MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
  }

  // copy datums to self, put at beginning of recvbuf

-  dest = recvbuf;
  for (i = 0; i < num_self; i++) {
    m = index_self[i];
-    memcpy(dest,&sendbuf[m*nbytes],nbytes);
-    dest += nbytes;
+    memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
  }

  // wait on all incoming messages

  if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
-
-  // approximate memory tally
-  // DEBUG lines
-
-  //bigint irregular_bytes = 2*nprocs*sizeof(int);
-  //irregular_bytes += maxindex*sizeof(int);
-  //irregular_bytes += maxbuf;
 }

 /* ----------------------------------------------------------------------
--- a/src/irregular.h
+++ b/src/irregular.h
@ -33,7 +33,6 @@ class Irregular : protected Pointers {
                     int *procassign = NULL);
  int migrate_check();
  int create_data(int, int *, int sortflag = 0);
-  int create_data_grouped(int, int *, int sortflag = 0);
  void exchange_data(char *, int, char *);
  void destroy_data();
  bigint memory_usage();
@ -49,7 +48,6 @@ class Irregular : protected Pointers {
  double *dbuf;                     // double buf for largest single atom send
  int maxbuf;                       // size of char buf in bytes
  char *buf;                        // char buf for largest single data send
-  int maxindex;                     // combined size of index_send + index_self

  int *mproclist,*msizes;           // persistent vectors in migrate_atoms
  int maxlocal;                     // allocated size of mproclist and msizes
--- a/src/read_data.cpp
+++ b/src/read_data.cpp
@ -120,9 +120,6 @@ void ReadData::command(int narg, char **arg)
 {
  if (narg < 1) error->all(FLERR,"Illegal read_data command");

-  MPI_Barrier(world);
-  double time1 = MPI_Wtime();
-
  // optional args

  addflag = NONE;
@ -909,18 +906,6 @@ void ReadData::command(int narg, char **arg)

    force->kspace = saved_kspace;
  }
-
-  // total time
-
-  MPI_Barrier(world);
-  double time2 = MPI_Wtime();
-
-  if (comm->me == 0) {
-    if (screen)
-      fprintf(screen,"  read_data CPU = %g secs\n",time2-time1);
-    if (logfile)
-      fprintf(logfile,"  read_data CPU = %g secs\n",time2-time1);
-  }
 }

 /* ----------------------------------------------------------------------
--- a/src/read_restart.cpp
+++ b/src/read_restart.cpp
@ -81,9 +81,6 @@ void ReadRestart::command(int narg, char **arg)
  if (domain->box_exist)
    error->all(FLERR,"Cannot read_restart after simulation box is defined");

-  MPI_Barrier(world);
-  double time1 = MPI_Wtime();
-
  MPI_Comm_rank(world,&me);
  MPI_Comm_size(world,&nprocs);

@ -565,18 +562,6 @@ void ReadRestart::command(int narg, char **arg)
    Special special(lmp);
    special.build();
  }
-
-  // total time
-
-  MPI_Barrier(world);
-  double time2 = MPI_Wtime();
-
-  if (comm->me == 0) {
-    if (screen)
-      fprintf(screen,"  read_restart CPU = %g secs\n",time2-time1);
-    if (logfile)
-      fprintf(logfile,"  read_restart CPU = %g secs\n",time2-time1);
-  }
 }

 /* ----------------------------------------------------------------------
--- a/src/replicate.cpp
+++ b/src/replicate.cpp
@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg)
  if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store)
    error->all(FLERR,"Cannot replicate with fixes that store atom quantities");

-  // record wall time for atom replication
+  // Record wall time for atom replication

  MPI_Barrier(world);
  double time1 = MPI_Wtime();
@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg)
    special.build();
  }

-  // total time
+  // Wall time

  MPI_Barrier(world);
  double time2 = MPI_Wtime();

  if (me == 0) {
    if (screen)
-      fprintf(screen,"  replicate CPU = %g secs\n",time2-time1);
+      fprintf(screen,"  Time spent = %g secs\n",time2-time1);
    if (logfile)
-      fprintf(logfile,"  replicate CPU = %g secs\n",time2-time1);
+      fprintf(logfile,"  Time spent = %g secs\n",time2-time1);
  }
 }
--- a/src/special.cpp
+++ b/src/special.cpp
--- a/src/special.h
+++ b/src/special.h
@ -26,43 +26,29 @@ class Special : protected Pointers {

 private:
  int me,nprocs;
-  int maxall;
  tagint **onetwo,**onethree,**onefour;

-  // data used by rendezvous callback methods
+  // data used by ring callback methods

-  int nrvous;
-  tagint *atomIDs;
-  int *procowner;
-
-  struct IDRvous {
-    int me;
-    tagint atomID;
-  };
-
-  struct PairRvous {
-    tagint atomID,partnerID;
-  };
-
-  // private methods
-  
-  void atom_owners();
-  void onetwo_build_newton();
-  void onetwo_build_newton_off();
-  void onethree_build();
-  void onefour_build();
+  int *count;
+  int **dflag;

  void dedup();
  void angle_trim();
  void dihedral_trim();
  void combine();
  void fix_alteration();
-  void timer_output(double);

-  // callback functions for rendezvous communication
+  // callback functions for ring communication

-  static int rendezvous_ids(int, char *, int &, int *&, char *&, void *);
-  static int rendezvous_pairs(int, char *, int &, int *&, char *&, void *);
+  static void ring_one(int, char *, void *);
+  static void ring_two(int, char *, void *);
+  static void ring_three(int, char *, void *);
+  static void ring_four(int, char *, void *);
+  static void ring_five(int, char *, void *);
+  static void ring_six(int, char *, void *);
+  static void ring_seven(int, char *, void *);
+  static void ring_eight(int, char *, void *);
 };

 }